# Hybrid Movie Recommender System
  

In [None]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

## Load Data

In [None]:

movies = pd.read_csv("movies.csv")   # movieId, title, genres
ratings = pd.read_csv("ratings.csv") # userId, movieId, rating, timestamp

display(movies.head())
display(ratings.head())

## Content-Based Filtering

In [None]:
movies['genres'] = movies['genres'].replace('(no genres listed)', '')

tfidf = TfidfVectorizer(token_pattern=r'[^|]+')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

def get_content_recommendations(title, num_recommendations=10):
    if title not in indices:
        return pd.DataFrame(columns=['movieId', 'title'])
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies[['movieId', 'title']].iloc[movie_indices].reset_index(drop=True)

# Example
get_content_recommendations("Toy Story (1995)", 5)

## Collaborative Filtering (SVD)

In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

algo = SVD()
algo.fit(trainset)

predictions = algo.test(testset)
rmse = accuracy.rmse(predictions, verbose=True)
print("RMSE:", rmse)

In [None]:
def get_collab_recommendations(user_id, num_recommendations=10):
    all_movie_ids = movies['movieId'].unique()
    rated_movies = ratings[ratings['userId'] == user_id]['movieId'].unique()
    unrated_movies = [mid for mid in all_movie_ids if mid not in rated_movies]

    preds = []
    for mid in unrated_movies:
        pred = algo.predict(user_id, mid)
        preds.append((mid, pred.est))

    preds.sort(key=lambda x: x[1], reverse=True)
    top = preds[:num_recommendations]
    top_ids = [mid for mid, _ in top]

    return movies[movies['movieId'].isin(top_ids)][['movieId', 'title']].reset_index(drop=True)

# Example
get_collab_recommendations(user_id=1, num_recommendations=5)

## Simple Hybrid Recommender

In [None]:
def hybrid_recommendations(user_id, liked_movie_title, num_recommendations=10, alpha=0.6):
    candidates_cf = get_collab_recommendations(user_id, num_recommendations=50)
    if liked_movie_title not in indices:
        return candidates_cf.head(num_recommendations)

    liked_idx = indices[liked_movie_title]
    sim_scores = list(enumerate(cosine_sim[liked_idx]))
    sim_df = pd.DataFrame(sim_scores, columns=['index', 'sim'])
    sim_df['movieId'] = movies.iloc[sim_df['index']]['movieId'].values

    merged = candidates_cf.merge(sim_df[['movieId', 'sim']], on='movieId', how='left')
    merged['sim'] = merged['sim'].fillna(0.0)
    merged['cf_score'] = np.linspace(1, 0, len(merged))
    merged['final_score'] = alpha * merged['cf_score'] + (1 - alpha) * merged['sim']
    merged = merged.sort_values('final_score', ascending=False)

    return merged[['movieId', 'title']].head(num_recommendations).reset_index(drop=True)

# Example
hybrid_recommendations(user_id=1, liked_movie_title="Toy Story (1995)", num_recommendations=10)