# 차원축소: SVD

* 데이터: [MovieLens 1M dataset](https://grouplens.org/datasets/movielens/1m/)
    - `.dat` 파일 형식
    - `ratings.dat`
        - `UserID::MovieID::Rating::Timestamp`
        - UserIDs range between 1 and 6040 
        - MovieIDs range between 1 and 3952
        - Ratings are made on a 5-star scale (whole-star ratings only)
        - Timestamp is represented in seconds since the epoch as returned by time(2)
        - Each user has at least 20 ratings


* 이슈
    - [MovieLens 25M dataset](https://grouplens.org/datasets/movielens/25m/): 데이터 용량 및 메모리
    

In [1]:
# module import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 1. 데이터 준비

 데이터 로드 후 user-item matrix로 변환

In [2]:
# load ratings data
ratings = pd.read_csv('./data/movielens/ml-1m/ratings.dat',
                      sep='::', 
                      header=None, 
                      engine='python')
ratings

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


In [3]:
# load movies data
movies = pd.read_csv('./data/movielens/ml-1m/movies.dat',
                     sep='::', 
                     header=None, 
                     engine='python')
movies

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


In [4]:
# rename columns
ratings = ratings.rename(columns={0: 'UserID', 1: 'MovieID', 2: 'Rating'})
movies = movies.rename(columns={0: 'MovieID', 1: 'Title', 2: 'Genre'})
ratings.head(2), movies.head(2)

(   UserID  MovieID  Rating          3
 0       1     1193       5  978300760
 1       1      661       3  978302109,
    MovieID             Title                         Genre
 0        1  Toy Story (1995)   Animation|Children's|Comedy
 1        2    Jumanji (1995)  Adventure|Children's|Fantasy)

In [5]:
# ratings to user-item matrix
user_item_df = ratings.pivot(index='UserID', columns='MovieID', values='Rating')
user_item_df

MovieID,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,2.0,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,,,,2.0,,3.0,,,,,...,,,,,,,,,,
6037,,,,,,,,,,,...,,,,,,,,,,
6038,,,,,,,,,,,...,,,,,,,,,,
6039,,,,,,,,,,,...,,,,,,,,,,


## 2. SVD

- imputation: NaN 데이터
- SVD 수행 방법론
    - mean 빼줄지 말지
    - 

In [6]:
# impute NaNs with 0
matrix = pd.DataFrame(user_item_df.fillna(0), columns=user_item_df.columns)
matrix

MovieID,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# matrix factorization with SVD
u, s, vt = np.linalg.svd(matrix)
u.shape, s.shape, vt.shape

((6040, 6040), (3706,), (3706, 3706))

In [9]:
# result matrix
display(pd.DataFrame(u))
display(pd.DataFrame(vt))
display(pd.DataFrame(u[:3706, :3706] @ np.diag(s) @ vt)) # 원본 행렬 완벽히 복원되지는 않음.

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039
0,-0.004718,0.001646,0.002671,0.001373,0.018726,-0.008714,0.005197,-0.004361,-0.018942,-0.013255,...,0.002279,-0.001612,0.003070,0.008289,0.004419,-0.003064,0.015710,0.020898,-0.011442,-0.017460
1,-0.009289,-0.002698,0.000382,-0.007095,-0.006765,-0.030726,0.002807,0.007203,0.001452,0.000866,...,-0.017050,0.020850,-0.001641,-0.017866,0.003027,-0.009158,-0.008867,0.000360,-0.020691,-0.021406
2,-0.005010,-0.003343,-0.003344,-0.003035,0.011767,-0.005247,-0.012518,-0.005198,0.002928,-0.001422,...,-0.026160,0.017891,0.007112,-0.020808,0.013160,0.002622,-0.013069,-0.004992,0.011466,-0.005235
3,-0.002677,-0.001297,-0.007675,-0.007271,0.002077,-0.005713,-0.003652,-0.000168,-0.003683,-0.000187,...,-0.022643,-0.000072,0.013679,0.005765,0.004835,-0.009604,-0.032775,-0.003975,0.011497,-0.009736
4,-0.008896,0.003825,0.022099,-0.018478,-0.004815,0.015998,0.010902,0.007355,0.005812,-0.004060,...,-0.005779,0.005165,0.000585,-0.000481,0.004707,-0.001083,-0.012393,-0.001057,-0.001597,0.005721
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035,-0.033984,0.026138,0.001081,0.002955,-0.018296,0.035779,0.039479,0.033072,-0.008027,0.024348,...,-0.002410,-0.000812,0.001061,-0.001583,0.000722,0.026208,-0.000810,0.001480,0.002713,0.000165
6036,-0.013905,0.027102,-0.011332,-0.019723,-0.000334,-0.005549,0.004914,-0.007649,-0.005285,0.001473,...,-0.000120,-0.000823,-0.014098,-0.011928,-0.004015,-0.011992,0.272355,0.003677,0.010307,0.010970
6037,-0.001389,0.001813,-0.000119,0.000556,0.004883,-0.000453,-0.001107,0.001264,0.001435,-0.000075,...,-0.003656,0.005154,0.006708,-0.009660,-0.012054,0.012600,0.004320,0.775052,-0.001361,-0.007210
6038,-0.007008,0.018765,-0.010712,0.008583,0.021981,-0.004233,0.018020,-0.012139,0.013724,-0.012571,...,-0.012676,0.003008,0.004859,-0.012246,0.000294,-0.016711,0.009086,-0.008816,0.340500,-0.003849


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3696,3697,3698,3699,3700,3701,3702,3703,3704,3705
0,-0.070137,-2.354382e-02,-1.376584e-02,-5.323396e-03,-9.716514e-03,-3.647701e-02,-1.650216e-02,-2.073570e-03,-3.307241e-03,-3.226286e-02,...,-2.868828e-03,-1.750356e-04,-5.070047e-04,-2.040892e-03,-1.963591e-03,-2.534057e-02,-1.291874e-02,-2.615263e-03,-1.166357e-03,-1.325659e-02
1,-0.020940,-2.979245e-02,-1.670390e-02,-2.962760e-03,-1.348858e-02,-2.883657e-02,-9.294363e-03,-2.153673e-03,-8.420669e-03,-5.404987e-02,...,2.982643e-03,1.762087e-04,-9.970160e-04,-2.372289e-03,3.205551e-03,-1.211868e-02,5.580257e-03,1.874400e-03,2.265112e-03,5.022133e-03
2,0.030165,-1.018907e-02,1.257242e-02,1.235569e-02,1.247469e-02,1.082548e-02,2.385216e-02,6.005952e-04,-2.060385e-03,-1.835195e-02,...,4.129865e-03,3.745209e-04,-7.264226e-05,1.581075e-03,-2.018143e-03,3.962501e-02,2.233269e-02,1.783192e-03,3.520917e-03,2.235768e-02
3,-0.004862,3.109033e-02,2.714274e-02,1.507195e-02,2.888498e-02,-4.199448e-02,4.401997e-02,4.120836e-03,3.532661e-03,6.075868e-03,...,-2.575274e-03,4.816028e-04,4.616878e-04,-2.204460e-04,4.056944e-04,-1.649750e-02,-1.359998e-02,-3.654285e-04,-1.109103e-03,-1.275768e-02
4,0.124778,9.580785e-03,5.832904e-03,-2.627695e-03,4.311423e-03,-7.956227e-02,1.207404e-02,2.851460e-03,-1.034408e-02,-2.418556e-02,...,-1.957933e-03,2.434527e-04,1.843148e-03,-4.112052e-03,-6.185812e-03,1.199396e-02,-1.410865e-02,-5.268348e-03,-8.658513e-04,-1.589169e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3701,0.000000,-2.230761e-17,1.084422e-17,-9.097646e-18,1.693048e-17,7.311588e-18,1.286693e-17,-4.724496e-17,2.829908e-17,-8.378862e-18,...,-8.332137e-17,-1.922692e-16,9.904127e-17,2.609371e-17,-5.961018e-17,-1.315703e-17,-7.599720e-18,-8.961243e-18,5.966573e-18,2.410314e-18
3702,0.000000,-7.332594e-18,6.184364e-19,-2.078822e-17,1.807157e-17,-5.354577e-18,-2.816764e-17,1.771789e-17,-3.765089e-17,8.804012e-18,...,2.640637e-18,1.839017e-16,-5.204036e-17,8.275015e-17,3.460474e-17,-9.686021e-19,-6.057016e-18,-1.546897e-17,5.018751e-17,-1.309804e-17
3703,0.000000,-1.785314e-17,-7.656492e-18,-2.417736e-17,-5.904310e-18,1.314251e-17,-5.179443e-18,5.066650e-17,-1.181340e-17,1.087027e-17,...,-2.838007e-17,-7.057958e-16,1.607449e-17,8.860450e-17,1.907980e-17,3.754470e-19,-3.196234e-17,2.026101e-17,6.155474e-17,-1.431870e-17
3704,0.000000,-1.175038e-18,2.275868e-19,7.645289e-18,9.882489e-18,-4.759426e-19,-4.223103e-18,2.712693e-17,-7.119565e-18,1.471785e-18,...,-3.020230e-17,-5.908956e-17,7.708921e-17,2.232428e-17,-7.486581e-18,5.326151e-19,2.419130e-18,2.431018e-17,1.050918e-17,4.848625e-18


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3696,3697,3698,3699,3700,3701,3702,3703,3704,3705
0,5.000000e+00,2.745074e-13,-5.178400e-15,-2.549694e-14,2.215114e-14,-5.748572e-14,-1.920659e-15,-4.296524e-14,1.622129e-15,1.977007e-14,...,8.037462e-16,-7.979728e-17,-2.536166e-15,6.588358e-16,2.269576e-15,-5.292934e-15,2.646754e-15,-2.920841e-16,1.692304e-15,1.710295e-15
1,-9.136009e-15,-2.434105e-13,1.969915e-14,3.106025e-14,1.797983e-13,-3.924557e-14,7.833528e-14,-2.169976e-14,-5.966726e-14,-1.620954e-14,...,6.343193e-16,1.855070e-16,2.033909e-15,-6.329301e-16,4.664780e-16,2.352244e-16,2.217668e-15,1.887027e-15,2.544284e-15,-4.056949e-17
2,-9.261872e-15,-1.234684e-14,-7.862829e-14,-8.827039e-14,9.657021e-14,-3.122826e-14,1.544643e-14,5.151885e-15,-1.103932e-14,-1.229611e-14,...,4.849636e-16,1.175817e-16,-2.047516e-16,-1.504906e-15,4.443061e-16,8.218923e-15,4.635002e-15,5.691790e-16,2.938459e-16,5.642635e-15
3,6.292673e-15,-1.108339e-14,-9.143727e-15,2.779814e-14,2.080939e-14,-1.227427e-14,1.160823e-14,-1.290743e-16,2.717234e-15,2.055049e-14,...,4.123018e-16,2.528359e-16,1.452018e-16,2.228286e-15,-1.348482e-15,9.852869e-15,-6.325310e-15,-8.039766e-16,1.088905e-15,-4.158457e-16
4,2.544741e-15,1.390273e-14,6.571437e-15,1.343608e-14,-3.967612e-14,2.000000e+00,1.627304e-14,4.223537e-15,1.051905e-14,-1.402280e-14,...,-1.839817e-15,2.246467e-15,5.773377e-17,-1.861101e-16,-3.806905e-16,-8.280035e-16,-1.355769e-14,-1.645317e-15,6.771656e-16,-4.771496e-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3701,4.000000e+00,-1.616087e-14,4.335924e-15,5.635717e-16,5.723991e-15,3.790181e-15,3.302968e-15,-3.527804e-15,-1.112703e-15,4.891144e-15,...,4.179057e-16,-2.610759e-16,-3.621777e-16,-1.511978e-15,-1.761829e-18,-7.143181e-15,-1.003605e-15,-1.288520e-15,-1.097761e-15,1.489599e-15
3702,-1.146849e-15,4.402503e-15,-3.786895e-15,9.488294e-16,3.378647e-15,1.272025e-15,3.911598e-18,-4.858920e-16,-1.336726e-15,-1.170450e-15,...,6.765015e-16,3.045524e-16,6.490576e-16,-5.957691e-16,1.227588e-15,2.083853e-15,-1.022753e-15,1.452214e-15,6.729914e-16,-6.220525e-16
3703,4.000000e+00,7.810077e-15,1.216619e-14,3.246867e-15,6.551337e-15,5.000000e+00,-1.479393e-14,4.693796e-15,4.517288e-15,1.936186e-14,...,-1.032974e-15,-4.250073e-17,2.947133e-16,2.667854e-15,3.267230e-15,3.000000e+00,4.459966e-15,4.371069e-15,-1.777316e-15,1.353742e-15
3704,5.000000e+00,7.716761e-14,1.169765e-14,5.100426e-15,1.078137e-15,6.269141e-15,1.156991e-14,-4.861020e-15,1.788595e-15,2.552945e-14,...,7.681301e-16,-1.322889e-15,-4.383429e-16,2.132300e-15,-1.142444e-15,-6.907408e-15,2.069552e-15,3.385855e-15,-7.869139e-16,3.486222e-15


## 3. 차원축소


In [10]:
# dimensionality reduction with numpy 
np.where(s == np.percentile(s, 100 - 80))[0][0]

2964

## 4. 추천

- 그 user가 높게 평가할 영화 중 top N개
- 그 user와 성향이 비슷한 user가 평가한 영화 중 top N개

In [11]:
# 행렬 복원
idx = np.where(s >= np.percentile(s, 100 - 90))[0][-1] + 1
reconstructed_matrix = pd.DataFrame(u[:, :idx] @ np.diag(s[:idx]) @ vt[:idx, :])
reconstructed_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3696,3697,3698,3699,3700,3701,3702,3703,3704,3705
0,5.000158,-0.000810,-0.000695,0.001007,-0.000448,-0.000023,0.001425,0.000158,0.001099,0.000040,...,-0.000127,-0.002556,-0.001385,0.002064,0.003243,0.000030,0.000701,-0.000926,-0.000329,-0.000024
1,0.000348,-0.000474,-0.000027,-0.001497,0.000102,-0.000677,-0.000433,-0.000770,-0.002836,0.000174,...,0.000789,0.004779,-0.005770,-0.000554,0.001235,-0.000351,0.000252,-0.003962,-0.000885,0.000112
2,0.000286,-0.000182,-0.000902,-0.000918,0.001934,0.000157,0.000227,-0.000577,-0.000317,-0.000059,...,-0.001262,-0.015687,0.009645,-0.001403,-0.002520,0.000073,0.000282,0.000585,0.001703,-0.000550
3,0.000242,0.000109,-0.000071,0.001738,-0.000380,0.000282,0.000054,0.000074,-0.000147,0.000142,...,0.001252,-0.007387,0.004273,0.001309,-0.000032,-0.000359,0.000572,-0.001661,-0.000267,0.000021
4,-0.000396,0.000034,0.000075,-0.001991,-0.000611,1.999392,0.000966,0.001579,-0.003746,-0.000184,...,-0.002280,0.002283,-0.002194,-0.002472,0.000738,-0.000128,-0.001663,0.005107,-0.000001,-0.000118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035,-0.000233,-0.000235,0.000272,2.000773,-0.000580,3.000143,0.000447,0.000879,0.000754,-0.000141,...,0.000071,0.008518,0.000234,0.000521,-0.000119,0.000164,-0.000643,0.001009,-0.001189,0.000174
6036,0.000073,0.000021,-0.000820,-0.001458,-0.000119,-0.000624,-0.000358,-0.000108,-0.000254,0.000072,...,0.003943,0.000409,0.004843,0.001083,0.000400,0.000157,-0.000031,-0.002457,-0.002488,0.000815
6037,-0.000247,0.000211,-0.000051,0.000199,0.000562,0.000260,0.000407,0.000319,-0.000256,0.000494,...,-0.002531,-0.002299,0.004643,-0.002584,-0.000476,0.000306,-0.000316,0.001952,-0.001108,0.000186
6038,-0.000095,0.001056,0.000195,-0.001984,-0.000354,0.000262,-0.000718,-0.001478,0.002389,0.000136,...,0.000520,0.004657,0.008332,0.002339,0.001699,0.000883,-0.001398,0.004580,0.003685,-0.000680


In [12]:
# recommend from plain top N lists
user_preds = reconstructed_matrix.iloc[1-1].sort_values(ascending=False) # user_id - 1
user_preds.index += 1 # movie id + 1
user_preds

958     5.001046
1       5.000158
2600    5.000093
582     4.999912
1849    4.999911
          ...   
1627   -0.035622
3362   -0.037128
2869   -0.040990
2012   -0.042330
3065   -0.051799
Name: 0, Length: 3706, dtype: float64

In [13]:
# user history
user_history = user_item_df[1].dropna()
user_history

UserID
1       5.0
6       4.0
8       4.0
9       5.0
10      5.0
       ... 
6022    5.0
6025    5.0
6032    4.0
6035    4.0
6040    3.0
Name: 1, Length: 2077, dtype: float64

In [16]:
# recommendation from movies that are not in user history
user_recommendations = user_preds[~user_preds.isin(user_history)].sort_values(ascending=False)
user_recommendations

958     5.001046
1       5.000158
2600    5.000093
582     4.999912
1849    4.999911
          ...   
1627   -0.035622
3362   -0.037128
2869   -0.040990
2012   -0.042330
3065   -0.051799
Name: 0, Length: 3706, dtype: float64

In [17]:
# top N recommendation from recommendation
top_n_recommendations = pd.DataFrame(user_recommendations[:10]).reset_index().rename(columns={'index': 'MovieID', 0: 'Expected Ratings'})
top_n_recommendations

Unnamed: 0,MovieID,Expected Ratings
0,958,5.001046
1,1,5.000158
2,2600,5.000093
3,582,4.999912
4,1849,4.999911
5,964,4.999884
6,514,4.999843
7,1179,4.999842
8,1105,4.999819
9,145,4.999786


In [18]:
# display topN movies
pd.merge(movies, top_n_recommendations, on='MovieID')

Unnamed: 0,MovieID,Title,Genre,Expected Ratings
0,1,Toy Story (1995),Animation|Children's|Comedy,5.000158
1,145,Bad Boys (1995),Action,4.999786
2,514,"Ref, The (1994)",Comedy,4.999843
3,582,Metisse (Caf� au Lait) (1993),Comedy,4.999912
4,958,Lady of Burlesque (1943),Comedy|Mystery,5.001046
5,964,Angel and the Badman (1947),Western,4.999884
6,1105,Children of the Corn IV: The Gathering (1996),Horror,4.999819
7,1179,"Grifters, The (1990)",Crime|Drama|Film-Noir,4.999842
8,1849,Prince Valiant (1997),Adventure,4.999911
9,2600,eXistenZ (1999),Action|Sci-Fi|Thriller,5.000093


### user-item matrix 메모리 이슈

In [None]:
matrix = pd.DataFrame()
i = 0
for df in np.array_split(ratings, 10):
    if i == 4:
        break
    temp = df.pivot(index='userId', columns='movieId', values='rating')
    if matrix.empty:
        matrix = temp
    else:
        matrix = matrix.merge(temp, how='outer', on='userId')
    display(matrix)
    i += 1    

In [None]:
# ratings data to user-item matrix
matrix = pd.DataFrame()
for i in range(len(ratings)//10000 + 1):
    temp = ratings[i*100000:(i+1)*100000] # slicing dataframe
    display(temp.pivot(index='userId', columns='movieId', values='rating'))
    if i == 2:
        break

In [None]:
# ratings dataframe to user-item matrix
num_users, num_movies = max(set(ratings['userId'])), max(set(ratings['movieId']))
for i in range(num_users):
    data_i = ratings.loc[ratings['userId']==i+1] # range 0부터 시작하므로
    