In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# 1. Load Data
movies = pd.read_csv(r'D:\Elevate Labs\Movie Recommendation Project\movies.csv')
ratings = pd.read_csv(r'D:\Elevate Labs\Movie Recommendation Project\ratings.csv')

# 2. Content-Based Filtering (Genre Similarity)
# We turn genres into a matrix of numbers
tfidf = TfidfVectorizer(stop_words='english')
movies['genres'] = movies['genres'].str.replace('|', ' ')
genre_matrix = tfidf.fit_transform(movies['genres'])
genre_sim = cosine_similarity(genre_matrix, genre_matrix)

# 3. Collaborative Filtering (Simplified User-Item)
# We find movies that are often rated similarly by users
user_movie_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
movie_sim = cosine_similarity(user_movie_matrix.T)
movie_sim_df = pd.DataFrame(movie_sim, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)

def get_recommendations(movie_title, top_n=5):
    # Get movie ID
    try:
        idx = movies[movies['title'] == movie_title].index[0]
        movie_id = movies.iloc[idx]['movieId']
    except:
        return ["Movie not found in database."]

    # Logic: Blend Content (Genre) and Collaborative (User Ratings)
    # For simplicity in this UI, we'll use Genre-based similarity here
    sim_scores = list(enumerate(genre_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    movie_indices = [i[0] for i in sim_scores[1:top_n+1]]
    return movies['title'].iloc[movie_indices].tolist()