In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
# Load datasets
ratings = pd.read_csv('C:/Users/joshi/OneDrive/Desktop/archive/ratings.csv')
movies = pd.read_csv('C:/Users/joshi/OneDrive/Desktop/archive/movies.csv')

In [7]:
# Merge ratings with movie titles
data = pd.merge(ratings, movies, on="movieId")

In [9]:
# Create a pivot table (users as rows, movies as columns, ratings as values)
user_movie_matrix = data.pivot_table(index="userId", columns="title", values="rating")

In [11]:
# Fill missing ratings with 0
user_movie_matrix = user_movie_matrix.fillna(0)

In [13]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_movie_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

In [15]:
# Function to get top N similar users
def get_similar_users(user_id, n=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)
    similar_users = similar_users.drop(user_id)  # remove self
    return similar_users.head(n).index

In [17]:
# Function to recommend movies for a user
def recommend_movies(user_id, n=5):
    similar_users = get_similar_users(user_id)
    similar_users_ratings = user_movie_matrix.loc[similar_users]
    avg_ratings = similar_users_ratings.mean(axis=0)
    user_seen_movies = user_movie_matrix.loc[user_id]
    avg_ratings = avg_ratings[user_seen_movies == 0]  # recommend unseen movies
    return avg_ratings.sort_values(ascending=False).head(n)

In [19]:
# Example: Recommend for user with ID 1
print("Top Recommendations for User 1:")
print(recommend_movies(1, n=5))

Top Recommendations for User 1:
title
Aliens (1986)                        4.8
Hunt for Red October, The (1990)     4.3
Blade Runner (1982)                  4.0
Godfather, The (1972)                4.0
Terminator 2: Judgment Day (1991)    4.0
dtype: float64
