In [18]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix



In [19]:
df_links    = pd.read_csv('./links.csv')
df_movies   = pd.read_csv('./movies.csv')
df_ratings  = pd.read_csv('./ratings.csv')
df_tags     = pd.read_csv('./tags.csv')

In [20]:
(df_links.head())

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [21]:
df_links.size

29226

In [22]:
df_movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [23]:
df_movies.size

29226

In [24]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [25]:
df_ratings.size

403344

In [26]:
df_tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200


In [27]:
df_tags.size

14732

In [28]:
# Transforming the ratings data into a user-item matrix
user_item_matrix = df_ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
user_item_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
# If your user_item_matrix is a dense numpy array with integers or any non-floating point type
user_item_matrix = user_item_matrix.astype(float)  # Convert to float if it's not already

# If it's dense but should be sparse for efficiency
user_item_matrix_sparse = csr_matrix(user_item_matrix)

# Applying SVD on the correctly formatted sparse matrix
U, sigma, Vt = svds(user_item_matrix_sparse, k=50)
sigma = np.diag(sigma)

In [30]:
# Predict ratings
predicted_ratings = np.dot(np.dot(U, sigma), Vt)

In [31]:
# Convert the predicted ratings to a DataFrame for easier interpretation
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

In [32]:
def recommend_movies_svd(user_id, user_item_matrix, predicted_ratings_df, movies_df, top_n=10):
    # Get the user's ratings
    user_ratings = user_item_matrix.loc[user_id]
    
    # Get the predicted ratings for the user and remove movies the user has already rated
    user_predicted_ratings = predicted_ratings_df.loc[user_id].drop(user_ratings[user_ratings > 0].index)
    
    # Get the top N movie recommendations
    top_n_movie_ids = user_predicted_ratings.sort_values(ascending=False).head(top_n).index
    recommended_movies = movies_df[movies_df['movieId'].isin(top_n_movie_ids)]
    
    return recommended_movies

In [33]:
user_id_example = 2
recommended_movies_svd_example = recommend_movies_svd(user_id_example, user_item_matrix, predicted_ratings_df, df_movies, top_n=10)
recommended_movies_svd_example

Unnamed: 0,movieId,title,genres
314,356,Forrest Gump (1994),Comedy|Drama|Romance|War
510,593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
1939,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller
2226,2959,Fight Club (1999),Action|Crime|Drama|Thriller
3638,4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy
4800,7153,"Lord of the Rings: The Return of the King, The...",Action|Adventure|Drama|Fantasy
6331,48780,"Prestige, The (2006)",Drama|Mystery|Sci-Fi|Thriller
7043,69122,"Hangover, The (2009)",Comedy|Crime
7413,80463,"Social Network, The (2010)",Drama
8879,134130,The Martian (2015),Adventure|Drama|Sci-Fi
