# Load Library and dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

movies = pd.read_csv("/content/movies.csv")
ratings = pd.read_csv("/content/ratings.csv")

#  popularity based recommender

In [2]:
def popularity_based_recommender(genre, min_reviews, top_n):
    df = movies.copy()
    df['genres'] = df['genres'].str.split('|')
    df = df.explode('genres')
    df = df[df['genres'].str.lower() == genre.lower()]
    df = df.merge(ratings, on='movieId')
    grouped = df.groupby(['movieId', 'title']).agg(
        avg_rating=('rating', 'mean'),
        num_reviews=('rating', 'count')
    ).reset_index()
    filtered = grouped[grouped['num_reviews'] >= min_reviews]
    sorted_df = filtered.sort_values(by='avg_rating', ascending=False).head(top_n)
    return sorted_df[['title', 'avg_rating', 'num_reviews']].reset_index(drop=True)

results = popularity_based_recommender(
    genre="Comedy",
    min_reviews=100,
    top_n=5
)
print(results)

                                    title  avg_rating  num_reviews
0  Monty Python and the Holy Grail (1975)    4.301948          154
1                            Fargo (1996)    4.271144          201
2              Princess Bride, The (1987)    4.163743          171
3                     Pulp Fiction (1994)    4.160000          325
4                     Forrest Gump (1994)    4.138264          311


# content based recommender

In [3]:
def content_based_recommender(movie_title, top_n):
    df = movies.copy()
    count_vect = CountVectorizer(tokenizer=lambda x: x.split('|'))
    genre_matrix = count_vect.fit_transform(df['genres'])
    cosine_sim = cosine_similarity(genre_matrix, genre_matrix)
    idx = df[df['title'].str.lower() == movie_title.lower()].index
    if len(idx) == 0:
        return f"Movie '{movie_title}' not found."
    idx = idx[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return df.iloc[movie_indices][['title', 'genres']].reset_index(drop=True)

results = content_based_recommender(
    movie_title="Toy Story (1995)",
    top_n=5
)
print(results)



                                            title  \
0                                     Antz (1998)   
1                              Toy Story 2 (1999)   
2  Adventures of Rocky and Bullwinkle, The (2000)   
3                Emperor's New Groove, The (2000)   
4                           Monsters, Inc. (2001)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1  Adventure|Animation|Children|Comedy|Fantasy  
2  Adventure|Animation|Children|Comedy|Fantasy  
3  Adventure|Animation|Children|Comedy|Fantasy  
4  Adventure|Animation|Children|Comedy|Fantasy  


# collaborative recommender

In [4]:
def collaborative_recommender(user_id, k_similar=100, top_n=5):
    user_item_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
    if user_id not in user_item_matrix.index:
        return f"User ID {user_id} not found."
    cosine_sim = cosine_similarity(user_item_matrix)
    sim_users = list(enumerate(cosine_sim[user_id - 1]))
    sim_users = sorted(sim_users, key=lambda x: x[1], reverse=True)[1:k_similar+1]
    similar_user_ids = [i[0] + 1 for i in sim_users]
    similar_users_data = ratings[ratings['userId'].isin(similar_user_ids)]
    watched_by_user = ratings[ratings['userId'] == user_id]['movieId'].unique()
    recs = similar_users_data[~similar_users_data['movieId'].isin(watched_by_user)]
    top_recs = recs.groupby('movieId').agg(
        avg_rating=('rating', 'mean'),
        num_ratings=('rating', 'count')
    ).sort_values(by=['avg_rating', 'num_ratings'], ascending=False).head(top_n)
    return top_recs.merge(movies, on='movieId')[['title', 'avg_rating', 'num_ratings']].reset_index(drop=True)

results = collaborative_recommender(
    user_id=1,
    k_similar=100,
    top_n=5
)
print(results)

                                 title  avg_rating  num_ratings
0     Grand Budapest Hotel, The (2014)         5.0            3
1  20,000 Leagues Under the Sea (1954)         5.0            2
2                      Bulworth (1998)         5.0            2
3            Lady Vanishes, The (1938)         5.0            2
4                  Billy Elliot (2000)         5.0            2
