In [1]:
import pandas as pd
import numpy as np
import math
from scipy.stats import pearsonr


df = pd.DataFrame([
    [4, 4, 4, 1, 1],
    [3, 1, np.nan, 4, np.nan],
    [4, 2, np.nan, 2, 3],
    [np.nan, 2, 3, np.nan, 1],
    [np.nan, np.nan, 1, 4, 3],
    [1, 1, np.nan, np.nan, 2]],
    columns=['User 1', 'User 2', 'User 3', 'User 4', 'User 5'],
    index=['Item 1', 'Item 2', 'Item 3', 'Item 4', 'Item 5', 'Item 6'])
df

Unnamed: 0,User 1,User 2,User 3,User 4,User 5
Item 1,4.0,4.0,4.0,1.0,1.0
Item 2,3.0,1.0,,4.0,
Item 3,4.0,2.0,,2.0,3.0
Item 4,,2.0,3.0,,1.0
Item 5,,,1.0,4.0,3.0
Item 6,1.0,1.0,,,2.0


### 1. Pearson correlation coefficient를 이용하여 User1와 다른 사용자들과의 similarity를 각각 구하시오

#### 피어슨 계수를 pandas의 데이터 프레임을 이용하여 계산한다.

In [2]:
df.corr(method='pearson')

Unnamed: 0,User 1,User 2,User 3,User 4,User 5
User 1,1.0,0.666667,,-0.944911,0.0
User 2,0.666667,1.0,1.0,-0.928571,-0.4842
User 3,,1.0,1.0,-1.0,-0.944911
User 4,-0.944911,-0.928571,-1.0,1.0,0.755929
User 5,0.0,-0.4842,-0.944911,0.755929,1.0


#### User 1 기준으로 다른 사용자들과의 피어슨 상관계수는 다음과 같다.

User 1 - User 2: 0.67<br>
User 1 - User 3: Nan # 공통 아이템이 1개이므로<br>
User 1 - User 4: -0.94<br>
User 1 - User 5: 0

#### 피어슨 계수 공식을 이용하여 계산한다.

In [3]:
# 공통 아이템을 찾는다.
def common_items(df, user1, user2):
    commons = []
    d1 = df[user1].tolist()
    d2 = df[user2].tolist()

    for i, (i1, i2) in enumerate(zip(d1, d2)):
        if not (math.isnan(i1) or math.isnan(i2)):
            commons.append(i)
    return commons

In [4]:
# 주어진 두 벡터에 대해 피어슨 계수를 계산한다.
def person_sim(a, b):
    return np.dot((a - np.mean(a)), (b - np.mean(b))) / ((np.linalg.norm(a - np.mean(a))) * (np.linalg.norm(b - np.mean(b))))

In [5]:
corrs = []
for i in range(2, df.shape[1]+1):
    commons = common_items(df, f'User 1', f'User {i}')
    corrs.append(person_sim(df[f'User 1'][commons].tolist(), df[f'User {i}'][commons].tolist()))
corrs

  return np.dot((a - np.mean(a)), (b - np.mean(b))) / ((np.linalg.norm(a - np.mean(a))) * (np.linalg.norm(b - np.mean(b))))


[0.6666666666666667, nan, -0.944911182523068, 0.0]

#### 데이터프레임으로 계산한 값과 같다.

User 1 - User 2: 0.67<br>
User 1 - User 3: Nan # 공통 아이템이 1개이므로<br>
User 1 - User 4: -0.94<br>
User 1 - User 5: 0

### 2. 위 결과를 이용하여 User1과 가장 유사한 두 사용자를 구하고, User1의 Item4에 대한 rating을 estimate하시오

#### 위 결과에 의해 피어슨 계수가 0 이상인 사용자는 User 2, User 5 이다. 단, 유저 5의 경우 피어슨 계수가 0이므로 피어슨 계수만으로 유사하다고 판별하기는 부족하다.

In [6]:
# 추정치, 정규화 및 가중치(피어슨 계수) 초기화
estimation = df['User 1'].mean()
normalize_sum = 0
weight_scaled_sum = 0

for i, elem in enumerate(zip(corrs, df)):
    corr = elem[0]
    if corr > 0:
        user = elem[1]
        weight_scaled_sum += corr * (df[f'User {i+2}']['Item 4'] - df[f'User {i+2}'].mean())
        normalize_sum += corr
estimation += weight_scaled_sum / normalize_sum

In [7]:
estimation

3.0

#### 피어슨 계수를 이용하여 User 1의 Item 4를 추정했을 때, 모형은 3점이라고 예측한다.

# Homework2

In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [9]:
movies = pd.read_csv('./ml-latest-small/movies.csv')
ratings = pd.read_csv('./ml-latest-small/ratings.csv')
ratings = ratings[['userId', 'movieId', 'rating']]

ratings_matrix = ratings.pivot_table('rating', index = 'userId', columns = 'movieId')
rating_movies = pd.merge(ratings, movies, on = 'movieId')
ratings_matrix = rating_movies.pivot_table('rating', index = 'userId', columns = 'title')

In [10]:
def get_rmse(R, P, Q, non_zeros):
    full_pred_matrix = np.dot(P, Q.T)
    x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]
    y_non_zero_ind = [non_zero[1] for non_zero in non_zeros]
    R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]
    full_pred_matrix_non_zeros = full_pred_matrix[x_non_zero_ind, y_non_zero_ind]
    mse = mean_squared_error(R_non_zeros, full_pred_matrix_non_zeros)
    rmse = np.sqrt(mse)
    return rmse

def matrix_factorization(R, K, steps=200, learning_rate=0.01, r_lambda = 0.01):
    num_users, num_items = R.shape
    P = np.random.normal(scale=1./K, size=(num_users, K))
    Q = np.random.normal(scale=1./K, size=(num_items, K))

    non_zeros = [ (i, j, R[i,j]) for i in range(num_users) for j in range(num_items) if R[i,j] > 0 ]
    for step in range(steps):
        for i, j, r in non_zeros:
            eij = r - np.dot(P[i, :], Q[j, :].T)
            P[i,:] = P[i,:] + learning_rate*(eij * Q[j, :] - r_lambda*P[i,:])
            Q[j,:] = Q[j,:] + learning_rate*(eij * P[i, :] - r_lambda*Q[j,:])
        rmse = get_rmse(R, P, Q, non_zeros)
        if (step % 10) == 0 :
            print("### iteration step : ", step," rmse : ", rmse)
    return P, Q

In [11]:
P, Q = matrix_factorization(
    ratings_matrix.values, K=50, steps=200, learning_rate=0.01, r_lambda=0.01)

### iteration step :  0  rmse :  2.8733168669164346
### iteration step :  10  rmse :  0.7376076244569582
### iteration step :  20  rmse :  0.5117241170714942
### iteration step :  30  rmse :  0.37024275761869346
### iteration step :  40  rmse :  0.29326670200130023
### iteration step :  50  rmse :  0.24929413542984186
### iteration step :  60  rmse :  0.22236406619982613
### iteration step :  70  rmse :  0.20459802853465509
### iteration step :  80  rmse :  0.19209475167424228
### iteration step :  90  rmse :  0.18283806190531304
### iteration step :  100  rmse :  0.17571468912890834
### iteration step :  110  rmse :  0.17006703471884935
### iteration step :  120  rmse :  0.16548251051221702
### iteration step :  130  rmse :  0.16168899588132632
### iteration step :  140  rmse :  0.15849966269537175
### iteration step :  150  rmse :  0.15578198296092993
### iteration step :  160  rmse :  0.15343934275781537
### iteration step :  170  rmse :  0.15139964151071206
### iteration step :  18

In [12]:
pred_matrix = np.dot(P, Q.T)

In [13]:
ratings_pred_matrix = pd.DataFrame(
    data=pred_matrix, index=ratings_matrix.index, columns=ratings_matrix.columns)
ratings_pred_matrix.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.2389,4.120319,3.595345,4.796097,4.473243,1.232677,3.575368,2.369843,4.469057,4.050945,...,1.307118,4.255569,3.778701,2.7892,2.828465,3.755299,4.168993,2.306535,3.941751,0.903197
2,3.2221,3.471802,3.017097,4.272124,4.245681,1.022362,3.539028,1.793688,3.459536,3.439455,...,1.096142,3.743551,3.350082,2.661007,2.347511,3.846664,2.267231,1.62694,4.308017,0.71515
3,2.115985,1.751268,1.49656,2.395588,2.522076,0.680872,1.085961,1.38172,1.898907,2.51139,...,0.748045,2.101832,2.083996,1.750513,1.664126,2.267598,2.135289,0.990718,2.443178,0.511297
4,2.43315,3.230366,2.827246,3.552308,3.463971,0.883107,2.506177,1.879114,3.232889,2.813008,...,1.105995,3.574885,2.746011,2.105837,1.802671,2.312785,0.61982,1.224338,4.061784,0.544853
5,2.654374,3.33194,2.992472,3.479291,3.652026,0.821933,2.16506,1.772875,1.236393,2.620358,...,1.057504,3.659239,2.931612,2.193228,2.014489,3.907995,2.188789,1.325622,3.651576,0.605593


In [14]:
user_10 = ratings_pred_matrix.iloc[10]

In [15]:
user_10.sort_values(ascending=False)

title
Run Lola Run (Lola rennt) (1998)                      5.724700
Schindler's List (1993)                               5.551521
Sound of Music, The (1965)                            5.471772
Matrix, The (1999)                                    5.465850
Big Lebowski, The (1998)                              5.446526
                                                        ...   
Indestructible Man (1956)                             0.259255
Starcrash (a.k.a. Star Crash) (1978)                  0.255387
Maria Bamford: The Special Special Special! (2012)    0.248021
Bloodsport: The Dark Kumite (1999)                    0.242070
Gypsy (1962)                                          0.217189
Name: 11, Length: 9719, dtype: float64

### 잠재 노드의 개수를 3개 줄여서 연산

In [16]:
P, Q = matrix_factorization(
    ratings_matrix.values, K=50-3, steps=200, learning_rate=0.01, r_lambda=0.01)

### iteration step :  0  rmse :  2.973970335900068
### iteration step :  10  rmse :  0.7255911283082964
### iteration step :  20  rmse :  0.5116990244034822
### iteration step :  30  rmse :  0.37777695757908736
### iteration step :  40  rmse :  0.3031119305814463
### iteration step :  50  rmse :  0.26059533683075486
### iteration step :  60  rmse :  0.2346139542094944
### iteration step :  70  rmse :  0.2174654624969401
### iteration step :  80  rmse :  0.20536529487502167
### iteration step :  90  rmse :  0.1963634979356724
### iteration step :  100  rmse :  0.1893889981623086
### iteration step :  110  rmse :  0.18381384433761913
### iteration step :  120  rmse :  0.17924748459279577
### iteration step :  130  rmse :  0.17543433137626
### iteration step :  140  rmse :  0.17219976423795838
### iteration step :  150  rmse :  0.16942008809938877
### iteration step :  160  rmse :  0.16700497967034666
### iteration step :  170  rmse :  0.1648867724284935
### iteration step :  180  rmse : 

In [17]:
pred_matrix_remove_latent_facotr1 = np.dot(P, Q.T)

In [18]:
ratings_pred_matrix_remove_latent_facotr1 = pd.DataFrame(
    data=pred_matrix_remove_latent_facotr1, index=ratings_matrix.index, columns=ratings_matrix.columns)
ratings_pred_matrix_remove_latent_facotr1.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.161007,3.774499,3.411325,4.417998,4.094778,1.181529,4.006874,2.282461,3.641024,4.133884,...,1.34884,3.826363,3.588856,2.842511,2.642188,4.360268,3.437536,2.099668,4.058492,1.002107
2,3.239193,3.664242,3.214304,4.125496,4.028037,1.165247,4.24525,1.859299,2.733835,3.666762,...,1.044732,3.579401,3.37086,2.590209,2.458028,3.859776,2.720285,1.629129,5.199798,0.916252
3,2.196037,2.247839,1.867052,2.605738,2.495473,0.565619,0.101474,1.470761,1.55334,2.29866,...,0.660264,2.558835,2.405196,1.809097,1.632471,2.546037,1.661517,1.200442,2.419977,0.563754
4,2.345128,3.040194,2.685458,3.675229,3.606368,0.402419,3.336642,1.802599,3.341214,2.947827,...,1.080651,2.101124,2.639305,1.897625,1.719965,2.665008,0.508341,1.366555,3.092094,0.706185
5,2.278969,3.217362,2.728792,3.313078,3.261961,0.585934,2.54585,1.755747,2.037843,2.559855,...,0.684716,3.165593,2.485491,1.790384,1.969193,3.616856,1.573441,1.411895,2.716737,0.713718


In [19]:
user_10 = ratings_pred_matrix_remove_latent_facotr1.iloc[10]

In [20]:
user_10.sort_values(ascending=False)

title
(500) Days of Summer (2009)           6.159394
Dead Poets Society (1989)             6.150266
You've Got Mail (1998)                5.950588
Usual Suspects, The (1995)            5.770801
WALL·E (2008)                         5.712018
                                        ...   
Trash Humpers (2009)                  0.220599
Haunted House 2, A (2014)             0.213625
Bad Santa 2 (2016)                    0.159289
Gypsy (1962)                          0.116498
Mortal Kombat: Annihilation (1997)   -0.039629
Name: 11, Length: 9719, dtype: float64

### 잠재 노드의 개수를 10개 줄여서 연산

In [21]:
P, Q = matrix_factorization(
    ratings_matrix.values, K=50-10, steps=200, learning_rate=0.01, r_lambda=0.01)

### iteration step :  0  rmse :  2.913355885015266
### iteration step :  10  rmse :  0.7243115040836775
### iteration step :  20  rmse :  0.5153791148152974
### iteration step :  30  rmse :  0.39360777982266365
### iteration step :  40  rmse :  0.3276947479014422
### iteration step :  50  rmse :  0.2893685160262366
### iteration step :  60  rmse :  0.2652753397582641
### iteration step :  70  rmse :  0.24895924669265693
### iteration step :  80  rmse :  0.23722196390754158
### iteration step :  90  rmse :  0.22837418506612198
### iteration step :  100  rmse :  0.221457550596711
### iteration step :  110  rmse :  0.21589329176372388
### iteration step :  120  rmse :  0.2113131575269149
### iteration step :  130  rmse :  0.20747223779199597
### iteration step :  140  rmse :  0.2042013203108034
### iteration step :  150  rmse :  0.20137960979172495
### iteration step :  160  rmse :  0.19891845608025144
### iteration step :  170  rmse :  0.19675126713171642
### iteration step :  180  rmse 

In [22]:
pred_matrix_remove_latent_facotr2 = np.dot(P, Q.T)

In [23]:
ratings_pred_matrix_remove_latent_facotr2 = pd.DataFrame(
    data=pred_matrix_remove_latent_facotr2, index=ratings_matrix.index, columns=ratings_matrix.columns)
ratings_pred_matrix_remove_latent_facotr2.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.201524,4.183275,3.727881,4.724031,4.531401,1.530729,3.340973,2.323085,3.892922,3.910809,...,1.399713,3.59281,3.672205,2.760472,2.497119,3.809245,4.272843,2.314012,4.126909,0.721264
2,3.291759,3.749706,3.227298,3.961876,3.854378,1.314434,3.890029,2.038746,3.189282,3.637293,...,1.012787,4.068333,3.226399,2.779144,2.286132,4.342079,2.646604,1.643958,4.599507,0.846548
3,2.591478,2.146832,1.843308,2.514927,2.485278,0.862009,1.879609,1.407814,1.214584,2.636065,...,0.62638,2.583466,1.955612,2.005808,1.889083,3.731881,2.287901,0.933684,2.150826,0.506761
4,2.422029,2.851673,2.56836,3.266858,3.406063,0.939223,4.514416,1.762084,4.522173,2.865302,...,1.020935,3.059874,2.859481,2.063631,1.964557,2.351481,1.235174,1.465491,2.475211,0.399474
5,2.41899,3.227211,2.817395,3.486901,3.25132,0.808674,2.93225,1.766198,1.350378,2.621477,...,0.847375,3.021216,2.454704,2.091531,1.808288,3.419131,2.590245,1.802785,2.664287,0.656634


In [24]:
user_10 = ratings_pred_matrix_remove_latent_facotr2.iloc[10]

In [25]:
user_10.sort_values(ascending=False)

title
No Country for Old Men (2007)       6.153709
Swingers (1996)                     6.006095
Army of Darkness (1993)             5.833682
Aliens (1986)                       5.779542
Hunt for Red October, The (1990)    5.740681
                                      ...   
Crow, The: Wicked Prayer (2005)     0.232286
Don't Look Now (1973)               0.231410
Born to Be Wild (1995)              0.227722
Gypsy (1962)                        0.190730
Unforgiven (2013)                   0.187620
Name: 11, Length: 9719, dtype: float64

잠재 노드를 줄였을 때 영화 추천 목록이 변한다. 즉, 노드를 제거하면 노드를 제거했을 때와 순위 변동이 발생하고, 많은 잠재 노드를 줄일수록 최대 값이 작아진다.

### 잠재 노드의 개수를 3개 늘려서 연산

In [26]:
P, Q = matrix_factorization(ratings_matrix.values, K=50+3, steps=200, learning_rate=0.01, r_lambda=0.01)

### iteration step :  0  rmse :  2.9434252788251904
### iteration step :  10  rmse :  0.7327490773789911
### iteration step :  20  rmse :  0.5093431237652234
### iteration step :  30  rmse :  0.36593477403038854
### iteration step :  40  rmse :  0.2858362589832286
### iteration step :  50  rmse :  0.24026283791110303
### iteration step :  60  rmse :  0.21260739862336672
### iteration step :  70  rmse :  0.19448856173075274
### iteration step :  80  rmse :  0.18182146964860701
### iteration step :  90  rmse :  0.1725004115398881
### iteration step :  100  rmse :  0.16536164320395277
### iteration step :  110  rmse :  0.15972054591029028
### iteration step :  120  rmse :  0.1551515016032515
### iteration step :  130  rmse :  0.15137679100280532
### iteration step :  140  rmse :  0.14820748878897877
### iteration step :  150  rmse :  0.14551042990677193
### iteration step :  160  rmse :  0.14318888127044813
### iteration step :  170  rmse :  0.1411707550482701
### iteration step :  180  r

In [27]:
pred_matrix_remove_latent_facotr1 = np.dot(P, Q.T)

In [28]:
ratings_pred_matrix_remove_latent_facotr1 = pd.DataFrame(
    data=pred_matrix_remove_latent_facotr1, index=ratings_matrix.index, columns=ratings_matrix.columns)
ratings_pred_matrix_remove_latent_facotr1.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.137658,4.075143,3.529823,4.626345,4.376067,1.322086,3.586749,2.304033,4.844185,4.211608,...,1.400713,4.774055,3.889236,2.813298,2.885249,4.261225,2.639768,2.138486,3.986532,0.868137
2,3.117153,3.619814,3.082331,4.231853,4.019112,1.29326,3.738321,1.837851,3.828695,3.585954,...,1.140978,3.791397,3.413028,2.621616,2.451309,3.616492,2.344929,1.83189,4.188102,0.770201
3,2.251025,2.230027,1.888596,2.700341,2.634655,0.669157,1.011879,1.581776,1.311541,2.284483,...,0.678869,2.716209,1.744432,1.865121,1.167839,3.276585,2.010709,0.78379,2.576487,0.39411
4,2.267994,3.231783,2.652275,3.518129,3.395053,0.731012,2.777939,1.889855,3.420676,2.89017,...,1.209481,2.169688,2.857086,1.95364,2.03811,2.940455,1.21718,1.611015,3.85738,0.564729
5,2.279929,3.25138,2.870389,3.944395,3.690367,0.966086,2.203787,1.801947,2.071704,3.265361,...,0.931159,4.141263,2.699855,2.076943,2.0428,3.456241,1.85823,1.583135,3.569816,0.60394


In [29]:
user_10 = ratings_pred_matrix_remove_latent_facotr1.iloc[10]

In [30]:
user_10.sort_values(ascending=False)

title
Battlestar Galactica (2003)                 6.063496
Leaving Las Vegas (1995)                    5.878098
Slumdog Millionaire (2008)                  5.831174
Dead Poets Society (1989)                   5.808051
Schindler's List (1993)                     5.802771
                                              ...   
Yongary: Monster from the Deep (1967)       0.273326
Mortal Kombat: The Journey Begins (1995)    0.270740
Aloha (2015)                                0.257421
Glitter (2001)                              0.225344
Bad Santa 2 (2016)                          0.186087
Name: 11, Length: 9719, dtype: float64

### 잠재 노드의 개수를 10개 늘려서 연산

In [31]:
P, Q = matrix_factorization(ratings_matrix.values, K=50+10, steps=200, learning_rate=0.01, r_lambda=0.01)

### iteration step :  0  rmse :  2.9961985092518204
### iteration step :  10  rmse :  0.735330857865317
### iteration step :  20  rmse :  0.5087491668695563
### iteration step :  30  rmse :  0.3587821478078005
### iteration step :  40  rmse :  0.2734025037218185
### iteration step :  50  rmse :  0.2246209320798016
### iteration step :  60  rmse :  0.19518460770554477
### iteration step :  70  rmse :  0.17609688586871353
### iteration step :  80  rmse :  0.1629112336690405
### iteration step :  90  rmse :  0.15333574917702
### iteration step :  100  rmse :  0.14610466196203872
### iteration step :  110  rmse :  0.14047182431214017
### iteration step :  120  rmse :  0.13597263408610305
### iteration step :  130  rmse :  0.13230435251195694
### iteration step :  140  rmse :  0.12926204170781014
### iteration step :  150  rmse :  0.1267023774079567
### iteration step :  160  rmse :  0.1245222582426173
### iteration step :  170  rmse :  0.12264566810375711
### iteration step :  180  rmse : 

In [32]:
pred_matrix_remove_latent_facotr1 = np.dot(P, Q.T)

In [33]:
ratings_pred_matrix_remove_latent_facotr1 = pd.DataFrame(
    data=pred_matrix_remove_latent_facotr1, index=ratings_matrix.index, columns=ratings_matrix.columns)
ratings_pred_matrix_remove_latent_facotr1.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2.93919,4.131673,3.670067,4.651427,4.516903,1.354842,4.09603,2.329448,3.888966,3.980393,...,1.342245,3.997898,3.926173,2.755215,2.686829,4.185312,3.654444,2.41024,4.01593,0.780195
2,2.906963,3.571879,3.174681,4.088665,3.964275,1.206362,4.074644,1.758925,3.038569,3.572619,...,1.055196,3.65403,3.25458,2.547037,2.430703,4.265509,2.329878,1.826619,3.990214,0.759293
3,2.356041,1.843991,1.686036,2.354157,2.224792,0.709468,1.473335,1.422421,2.120297,2.351814,...,0.724169,2.576903,2.219162,1.891802,1.689377,2.944384,1.660084,0.888581,2.746286,0.536467
4,2.203605,3.012114,2.659256,3.397188,3.283046,0.869983,2.549768,1.798846,3.100444,2.877511,...,1.101535,2.954189,2.686306,1.81978,2.028826,2.548456,0.467491,1.377901,2.881961,0.579919
5,2.408185,3.310467,2.995225,3.376833,3.293285,0.80857,1.537545,1.734479,2.222702,2.675083,...,1.021224,3.698558,2.509464,2.078656,2.036203,3.683662,1.404629,1.39864,2.214377,0.623679


In [34]:
user_10 = ratings_pred_matrix_remove_latent_facotr1.iloc[10]

In [35]:
user_10.sort_values(ascending=False)

title
Matrix, The (1999)                                    5.926477
Bourne Identity, The (2002)                           5.772982
Aliens (1986)                                         5.482771
You've Got Mail (1998)                                5.435686
Schindler's List (1993)                               5.411756
                                                        ...   
Bad Santa 2 (2016)                                    0.248591
Good Burger (1997)                                    0.229541
Mortal Kombat: Annihilation (1997)                    0.191166
Maria Bamford: The Special Special Special! (2012)    0.173736
Gypsy (1962)                                          0.158699
Name: 11, Length: 9719, dtype: float64

잠재 노드를 늘렸을 때도 줄였을 때와 마찬가지로 목록에 변동이 발생한다. 늘리거나 줄이거나, 노드 수를 크게 변화시킬수록 최대 값이 작아지는 현상을 관찰할 수 있다.