#### 1. Library Import

In [2]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

#### 2. Data Import

In [3]:
ratings = pd.read_csv('../datasets/movie/ratings_small.csv')

ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
movies = pd.read_csv('../datasets/movie/movies_small.csv')

movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
# ratings 와 movies merge

df = pd.merge(ratings, movies, on='movieId', how='inner')

df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [6]:
matrix = df.pivot_table(index='userId', columns='title', values='rating').fillna(0)

matrix.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
matrix.describe()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
count,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,...,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0
mean,0.006557,0.006557,0.011475,0.008197,0.013115,0.002459,0.088525,0.004918,0.252459,0.037705,...,0.002459,0.053279,0.018033,0.009836,0.004918,0.139344,0.109016,0.016393,0.133607,0.001639
std,0.161955,0.161955,0.200245,0.202444,0.235917,0.060733,0.560348,0.121466,0.968825,0.353571,...,0.060733,0.470856,0.26042,0.171638,0.121466,0.736293,0.570916,0.184969,0.661963,0.040489
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,4.0,4.0,3.5,5.0,5.0,1.5,5.0,3.0,5.0,4.0,...,1.5,5.0,4.5,3.0,3.0,5.0,4.0,2.5,5.0,1.0


In [8]:
movie_user_rating = matrix.values.T
movie_user_rating

array([[0. , 0. , 0. , ..., 0. , 0. , 4. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 1.5],
       [4. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [9]:
movie_user_rating.shape

(9719, 610)

#### 4. sklearn.decomposition.TruncatedSVD

* class sklearn.decomposition.TruncatedSVD(n_components=2, *, algorithm='randomized', n_iter=5, n_oversamples=10, power_iteration_normalizer='auto', random_state=None, tol=0.0)

In [10]:
SVD = TruncatedSVD(n_components=12, n_iter=7, random_state=42)
svd_matrix = SVD.fit_transform(movie_user_rating)
svd_matrix.shape

(9719, 12)

In [11]:
item_similarity = np.corrcoef(svd_matrix)

In [12]:
item_similarity.shape

(9719, 9719)

In [40]:
# sns.heatmap(item_similarity)

In [13]:
movie_title = matrix.columns
movie_title_list = list(movie_title)

In [14]:
coffey_hands = movie_title_list.index('Guardians of the Galaxy (2014)')
coffey_hands

3667

In [15]:
corr_coffey_hands = item_similarity[coffey_hands]
list(movie_title[corr_coffey_hands >= 0.9])[:10]

['Adjustment Bureau, The (2011)',
 'Amazing Spider-Man, The (2012)',
 'Ant-Man (2015)',
 'Avatar (2009)',
 'Avengers, The (2012)',
 'Avengers: Age of Ultron (2015)',
 'Big Hero 6 (2014)',
 'Brave (2012)',
 'Captain America: Civil War (2016)',
 'Captain America: The First Avenger (2011)']

In [16]:
corr_coffey_hands > 0.9

array([False, False, False, ..., False, False, False])

#### 5. 특정 사용자에게 추천해주기

In [17]:
matrix_1 = matrix.to_numpy()

In [18]:
matrix_1_mean = np.mean(matrix_1, axis=1)

In [19]:
matrix_norm = matrix_1 - matrix_1_mean.reshape(-1, 1)

In [20]:
matrix_norm.shape

(610, 9719)

---

In [22]:
pd.DataFrame(matrix_norm, columns=matrix.columns).head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
0,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,...,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,3.895771,-0.104229
1,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,...,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781
2,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,...,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775
3,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,...,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902
4,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,...,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463


In [23]:
U, sigma, Vt = svds(matrix_norm, k=12)

In [24]:
U.shape

(610, 12)

In [25]:
sigma.shape

(12,)

In [26]:
Vt.shape

(12, 9719)

In [27]:
# 정사각행렬 만들기
sigma = np.diag(sigma)

In [None]:

svd_user_predict_rating = np.dot(np.dot(U, sigma), Vt) + matrix_1.mean.reshape(-1, 1)

In [None]:
df_svd_user_predict_rating = pd.DataFrame(svd_user_predict_rating, columns=matrix.columns)

In [None]:
df_svd_user_predict_rating.shape