# Singular Value Decomposition (SVD)

In [39]:
import pandas as pd
import numpy as np
from numpy.linalg import svd

In [40]:
# Cargar datos limpios
ratings_original = pd.read_csv(r"../data/process/clean_rating.csv")

In [41]:
ratings_original

Unnamed: 0,userid,movieid,rating,timestamp
0,31,1,3.0,2015-02-23 23:18:07
1,31,110,5.0,2015-02-23 23:17:53
2,31,260,5.0,2015-02-23 23:17:13
3,31,364,3.0,2015-02-25 06:13:27
4,31,527,0.5,2015-02-23 23:19:58
...,...,...,...,...
283881,138414,100163,3.0,2015-03-03 21:49:43
283882,138414,102720,3.5,2015-01-18 07:38:37
283883,138414,112183,5.0,2015-02-21 17:23:08
283884,138414,115617,4.0,2015-01-15 09:19:49


In [42]:
ratings_original.shape

(283886, 4)

In [43]:
movies = pd.read_csv(r"../data/process/movie_perfil_contenido.csv",
        # usecols = ["movieid", "title"]
)
movies.shape

(27278, 48)

In [44]:
df = pd.merge(ratings_original, movies, on='movieid')

In [45]:
df.head()

Unnamed: 0,userid,movieid,rating,timestamp,title,genres,no_genres_listed,Action,Adventure,Animation,...,2008,2009,2010,2011,2012,2013,2014,2015,tag,tmdbid
0,31,1,3.0,2015-02-23 23:18:07,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,0,1,1,...,3.740781,3.766777,3.856772,3.861689,3.931421,3.946274,3.954945,3.860412,"Watched, computeranimation, Disneyanimatedfeat...",862.0
1,31,110,5.0,2015-02-23 23:17:53,Braveheart (1995),Action|Drama|War,0,1,0,0,...,3.890887,3.85164,3.847527,3.849012,3.869754,3.869778,3.820526,3.809346,"drama, historical, Medieval, melgibson, Nudity...",197.0
2,31,260,5.0,2015-02-23 23:17:13,Star Wars Episode IV A New Hope (1977),Action|Adventure|Sci-Fi,0,1,1,0,...,4.050584,4.041898,4.084775,4.097885,4.096226,4.149308,4.052098,3.897613,"1970s, fantasy, scifi, epic, exciting, galacti...",11.0
3,31,364,3.0,2015-02-25 06:13:27,"Lion King, The (1994)",Adventure|Animation|Children|Drama|Musical|IMAX,0,0,1,1,...,3.729811,3.819801,3.89123,3.91709,3.959759,3.975145,3.930285,3.913725,"comingofage, Disneyanimatedfeature, hereditary...",8587.0
4,31,527,0.5,2015-02-23 23:19:58,Schindler's List (1993),Drama|War,0,0,0,0,...,4.160721,4.121274,4.159131,4.116312,4.136686,4.143459,4.108263,3.963446,"blackandwhite, hitler, WorldWarII, Holocaust, ...",424.0


In [46]:
n_users = df['userid'].nunique()
n_movies = df['movieid'].nunique()

In [47]:
print(f"Numero de users: {n_users}")
print(f"Numero de Peliculas: {n_movies}")
print(f"Total ratings: {len(df)}")

Numero de users: 3903
Numero de Peliculas: 14639
Total ratings: 283886


In [48]:
user_item_matrix = df.pivot_table(index='userid', columns='movieid', values='rating').fillna(0)

In [49]:
print("Matrix:", user_item_matrix.shape)

Matrix: (3903, 14639)


In [50]:
R = user_item_matrix.to_numpy()

In [51]:
U, sigma, Vt = svd(R, full_matrices=False)
print(f"Shapes -> U: {U.shape}, Sigma: {sigma.shape}, Vt: {Vt.shape}")

Shapes -> U: (3903, 3903), Sigma: (3903,), Vt: (3903, 14639)


In [52]:
k = 50
sigma_k = np.diag(sigma[:k])

R_approx = np.dot(np.dot(U[:, :k], sigma_k), Vt[:k, :])
R_pred_df = pd.DataFrame(R_approx, 
                        index=user_item_matrix.index, 
                        columns=user_item_matrix.columns)

In [53]:
R_pred_df.head()

movieid,1,2,3,4,5,6,7,8,9,10,...,131241,131243,131248,131250,131252,131254,131256,131258,131260,131262
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
31,2.393364,1.019678,-0.137438,-0.06403,-0.101425,-0.72124,-0.255042,-0.030826,0.004921,0.084256,...,0.07612,0.07612,0.07612,0.07612,0.07612,0.07612,0.07612,-0.003037,-0.00172,-0.004253
96,0.299233,0.069756,-0.021975,-0.019778,-0.032301,0.14812,-0.062171,-0.004031,-0.004191,0.047577,...,0.008369,0.008369,0.008369,0.008369,0.008369,0.008369,0.008369,-0.000932,-0.001973,0.000581
176,0.714557,0.199338,0.023719,0.01666,0.082528,0.212068,0.03257,0.016874,-0.016514,0.0725,...,-0.00227,-0.00227,-0.00227,-0.00227,-0.00227,-0.00227,-0.00227,-5.6e-05,-0.001319,-0.000709
215,0.054778,0.05133,0.010721,-0.003024,-0.002939,0.030169,0.009479,0.002904,-0.002172,0.012541,...,0.005297,0.005297,0.005297,0.005297,0.005297,0.005297,0.005297,0.000189,-0.000181,-2.6e-05
260,1.469242,0.560003,0.064146,0.019282,0.102312,0.017921,0.146009,-0.010838,-0.011901,0.154965,...,-0.008552,-0.008552,-0.008552,-0.008552,-0.008552,-0.008552,-0.008552,-0.001949,0.004382,0.0043


In [54]:
user_id = 31

# Original ratings and predictions
user_ratings = user_item_matrix.loc[user_id]
user_predictions = R_pred_df.loc[user_id]

# Recommend top 10 unrated movies
unrated = user_ratings[user_ratings == 0]
recommendations = user_predictions[unrated.index].sort_values(ascending=False).head(10)
recommendations.reset_index( inplace=False )
#recommendations

# rdf = pd.merge(recommendations, movies, left_on="movieid", right_on="movieid", how="left")
rdf = pd.merge(recommendations, movies, on="movieid", how="left")
rdf.head()

Unnamed: 0,movieid,31,title,genres,no_genres_listed,Action,Adventure,Animation,Children,Comedy,...,2008,2009,2010,2011,2012,2013,2014,2015,tag,tmdbid
0,78499,3.244044,Toy Story 3 (2010),Adventure|Animation|Children|Comedy|Fantasy|IMAX,0,0,1,1,1,1,...,0.0,0.0,4.142544,4.012645,3.993615,3.965144,3.925974,3.889976,"tense, Alivetoys, adventure, animation, bitter...",10193.0
1,111362,3.199614,XMen Days of Future Past (2014),Action|Adventure|Sci-Fi,0,1,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.846614,3.780189,"JamesMcAvoy, JenniferLawrence, Marvel, Patrick...",127585.0
2,91542,2.785006,Sherlock Holmes A Game of Shadows (2011),Action|Adventure|Comedy|Crime|Mystery|Thriller,0,1,1,0,0,1,...,0.0,0.0,0.0,3.590909,3.771689,3.874034,3.83463,3.655039,"adaptedfrombookseries, animaldog, animaldonkey...",58574.0
3,56174,2.656042,I Am Legend (2007),Action|Horror|Sci-Fi|Thriller|IMAX,0,1,0,0,0,0,...,3.476411,3.442442,3.490771,3.556999,3.553996,3.605178,3.422892,3.475884,"adaptedfrombook, anotheradaptationofIamLegend,...",6479.0
4,8644,2.623568,"I, Robot (2004)",Action|Adventure|Sci-Fi|Thriller,0,1,1,0,0,0,...,3.471572,3.407767,3.438816,3.555469,3.541331,3.558201,3.462585,3.373938,"artificialintelligence, futuristic, scifi, and...",2048.0
