In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

from scipy.sparse.linalg import svds


In [4]:
df_links    = pd.read_csv('./datasets/links.csv')
df_movies   = pd.read_csv('./datasets/movies.csv')
df_ratings  = pd.read_csv('./datasets/ratings.csv')
df_tags     = pd.read_csv('./datasets/tags.csv')

In [5]:
user_item_matrix = df_ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
user_item_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
item_similarity = cosine_similarity(user_item_matrix.T)  # Transpose to get item-item matrix
item_similarity

array([[1.        , 0.41056206, 0.2969169 , ..., 0.        , 0.        ,
        0.        ],
       [0.41056206, 1.        , 0.28243799, ..., 0.        , 0.        ,
        0.        ],
       [0.2969169 , 0.28243799, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [7]:
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

In [8]:
user_item_matrix_shape = user_item_matrix.shape
item_similarity_df_sample = item_similarity_df.iloc[:5, :5]

In [9]:
(user_item_matrix_shape, item_similarity_df_sample)

((610, 9724),
 movieId         1         2         3         4         5
 movieId                                                  
 1        1.000000  0.410562  0.296917  0.035573  0.308762
 2        0.410562  1.000000  0.282438  0.106415  0.287795
 3        0.296917  0.282438  1.000000  0.092406  0.417802
 4        0.035573  0.106415  0.092406  1.000000  0.188376
 5        0.308762  0.287795  0.417802  0.188376  1.000000)

In [10]:
def recommend_movies(user_id, user_item_matrix, item_similarity_df, top_n=10):
    rated_movies = user_item_matrix.loc[user_id]
    rated_movies = rated_movies[rated_movies > 0].index.tolist()
    
    sim_scores = item_similarity_df[rated_movies].sum(axis=1)
    
    sim_scores = sim_scores.drop(index=rated_movies)
    
    recommended_movie_ids = sim_scores.sort_values(ascending=False).head(top_n).index
    recommended_movies = df_movies[df_movies['movieId'].isin(recommended_movie_ids)]
    
    return recommended_movies

In [11]:
user_id_example = 2
recommended_movies_example = recommend_movies(user_id_example, user_item_matrix, item_similarity_df, top_n=10)
recommended_movies_example

Unnamed: 0,movieId,title,genres
2226,2959,Fight Club (1999),Action|Crime|Drama|Thriller
4935,7438,Kill Bill: Vol. 2 (2004),Action|Drama|Thriller
5917,33794,Batman Begins (2005),Action|Crime|IMAX
6331,48780,"Prestige, The (2006)",Drama|Mystery|Sci-Fi|Thriller
6743,59315,Iron Man (2008),Action|Adventure|Sci-Fi
7043,69122,"Hangover, The (2009)",Comedy|Crime
7212,72998,Avatar (2009),Action|Adventure|Sci-Fi|IMAX
7214,73017,Sherlock Holmes (2009),Action|Crime|Mystery|Thriller
7395,79702,Scott Pilgrim vs. the World (2010),Action|Comedy|Fantasy|Musical|Romance
7413,80463,"Social Network, The (2010)",Drama
