In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# 1. Load movies dataset (make sure movies.csv is in the same folder)
movies = pd.read_csv("movies.csv")

# 2. Create TF-IDF matrix from genres
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies["genres"].fillna(""))

# 3. Compute similarity between all movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# 4. Create index mapping (title -> index)
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# 5. Recommendation function
def recommend_movies(title, k=5):
    if title not in indices:
        return f"Movie '{title}' not found in dataset."
    idx = indices[title]
    sim_scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:k+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices][["title","genres"]]

# Example: recommend movies similar to "Toy Story (1995)"
print(recommend_movies("Toy Story (1995)"))

                                               title  \
1706                                     Antz (1998)   
2355                              Toy Story 2 (1999)   
2809  Adventures of Rocky and Bullwinkle, The (2000)   
3000                Emperor's New Groove, The (2000)   
3568                           Monsters, Inc. (2001)   

                                           genres  
1706  Adventure|Animation|Children|Comedy|Fantasy  
2355  Adventure|Animation|Children|Comedy|Fantasy  
2809  Adventure|Animation|Children|Comedy|Fantasy  
3000  Adventure|Animation|Children|Comedy|Fantasy  
3568  Adventure|Animation|Children|Comedy|Fantasy  
