In [2]:
!pip install pandas scikit-learn



In [4]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

file_path = "C:/Users/hp5cd/Downloads/movies_metadata.csv"
df = pd.read_csv(file_path, low_memory=False)

df = df[['title', 'genres']].dropna()

def parse_genres(genres_str):
    try:
        genres_list = ast.literal_eval(genres_str)
        return " ".join([g['name'] for g in genres_list])
    except:
        return ""

df['parsed_genres'] = df['genres'].apply(parse_genres)
df = df[df['parsed_genres'].str.strip() != '']
df = df.drop_duplicates(subset='title')
df = df[df['title'].notnull()]

df = df.head(1000)

cv = CountVectorizer()
genre_matrix = cv.fit_transform(df['parsed_genres'])

similarity = cosine_similarity(genre_matrix)

def recommend_movie(title, df, similarity_matrix):
    if title not in df['title'].values:
        return f"⚠ Movie '{title}' not found in subset."

    index = df[df['title'] == title].index[0]
    scores = list(enumerate(similarity_matrix[index]))
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_movies = [df.iloc[i[0]]['title'] for i in sorted_scores[1:6]]
    return top_movies

example_title = "Toy Story"
recommendations = recommend_movie(example_title, df, similarity)

print(f"\n✅ Recommendations for '{example_title}':")
for rec in recommendations:
    print(f"- {rec}")


✅ Recommendations for 'Toy Story':
- Oliver & Company
- A Close Shave
- The Aristocats
- Pete's Dragon
- Big Bully
