In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
ratings = pd.read_csv('/content/ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,title,rating
0,1,8,Fight Club,3.7
1,1,1,The Shawshank Redemption,4.6
2,1,24,The Departed,4.3
3,1,9,Forrest Gump,4.4
4,1,32,The Pianist,3.5


In [4]:
from scipy.sparse import csr_matrix

In [5]:
def create_matrix(df):
    user_mapper = {uid: i for i, uid in enumerate(df['userId'].unique())}
    movie_mapper = {mid: i for i, mid in enumerate(df['movieId'].unique())}
    movie_inv_mapper = {i: mid for mid, i in movie_mapper.items()}

    user_index = df['userId'].map(user_mapper)
    movie_index = df['movieId'].map(movie_mapper)

    X = csr_matrix((df["rating"], (movie_index, user_index)),
                   shape=(len(movie_mapper), len(user_mapper)))
    return X, movie_mapper, movie_inv_mapper


X, movie_mapper, movie_inv_mapper = create_matrix(ratings)

user_item_matrix = ratings.pivot_table(
    index="title", columns="userId", values="rating")
print(user_item_matrix.iloc[:10, :5])

userId               1    2    3    4    5
title                                     
12 Angry Men       NaN  NaN  3.6  NaN  NaN
Avengers: Endgame  NaN  3.5  NaN  NaN  4.5
Django Unchained   NaN  NaN  NaN  NaN  NaN
Fight Club         3.7  3.7  NaN  3.7  NaN
Forrest Gump       4.4  NaN  NaN  NaN  3.7
Gladiator          NaN  NaN  NaN  NaN  NaN
Goodfellas         NaN  NaN  3.6  NaN  NaN
Inception          NaN  NaN  NaN  NaN  NaN
Interstellar       NaN  4.9  NaN  NaN  NaN
Joker              3.6  NaN  NaN  NaN  NaN


In [6]:
def recommend_similar(movie_title, df, X, movie_mapper, movie_inv_mapper, k=5):
    movie_id = df[df['title'] == movie_title]['movieId'].iloc[0]
    movie_idx = movie_mapper[movie_id]
    movie_vec = X[movie_idx]

    model = NearestNeighbors(metric='cosine', algorithm='brute')
    model.fit(X)
    distances, indices = model.kneighbors(movie_vec, n_neighbors=k + 1)

    neighbor_ids = [movie_inv_mapper[i] for i in indices.flatten()[1:]]
    recommendations = df[df['movieId'].isin(neighbor_ids)]['title'].unique()

    print(f"\nBecause you liked **{movie_title}**, you might also enjoy:")
    for rec in recommendations:
        print(f"- {rec}")

In [8]:
pip install scikit-learn



In [10]:
from sklearn.neighbors import NearestNeighbors

In [11]:
recommend_similar("The Dark Knight", ratings, X,
                  movie_mapper, movie_inv_mapper, k=5)


Because you liked **The Dark Knight**, you might also enjoy:
- Fight Club
- The Pianist
- Parasite
- 12 Angry Men
- The Lord of the Rings: The Return of the King
