In [34]:
import numpy as np
import pandas as pd
import os

from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds

# Ex 1) MovieLens Data 

In [310]:
rating_data = pd.read_csv('/Users/jayyoon/Desktop/leeminho/rs/data/archive/ratings.csv')
movie_data = pd.read_csv('/Users/jayyoon/Desktop/leeminho/rs/data/archive/movies.csv')

In [8]:
rating_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [9]:
movie_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [312]:
rating_data.drop('timestamp', axis = 1, inplace = True)
movie_data.drop('genres', axis = 1, inplace = True)

In [313]:
user_movie_data = pd.merge(rating_data, movie_data, on = 'movieId')
user_movie_data.head()

Unnamed: 0,userId,movieId,rating,title
0,1,31,2.5,Dangerous Minds (1995)
1,7,31,3.0,Dangerous Minds (1995)
2,31,31,4.0,Dangerous Minds (1995)
3,32,31,4.0,Dangerous Minds (1995)
4,36,31,3.0,Dangerous Minds (1995)


## 특정 영화와 비슷한 영화들을 추천

In [314]:
user_movie_data = user_movie_data.pivot_table('rating', index = 'userId', columns = 'title')

In [315]:
movie_user_data = user_movie_data.T

In [316]:
movie_user_data.head()

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",,,,,,,,,,,...,,,,,,,,,,
$9.99 (2008),,,,,,,,,,,...,,,,,,,,,,
'Hellboy': The Seeds of Creation (2004),,,,,,,,,,,...,,,,,,,,,,
'Neath the Arizona Skies (1934),,,,,,,,,,,...,,,,,,,,,,
'Round Midnight (1986),,,,,,,,,,,...,,,,,,,,,,


In [317]:
#평가한 점수에서 평균 점수를 빼고, 평가 안 한 정보는 0으로
movie_user_rating = (movie_user_data - movie_user_data.mean(axis = 0)).fillna(0)
movie_user_rating.head()

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$9.99 (2008),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Neath the Arizona Skies (1934),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [318]:
#SVD 진행
#Truncated SVD: sigma 행렬의 특이값 중 상위 n개만 가져온다.
SVD = TruncatedSVD(n_components = 12)
matrix = SVD.fit_transform(movie_user_rating)
matrix.shape

(9064, 12)

In [319]:
matrix[0]

array([-0.01581366, -0.00187164,  0.00438528, -0.01204048,  0.00553814,
       -0.00510378,  0.01619752,  0.00498698, -0.03251911,  0.02468528,
        0.03485973, -0.01663176])

In [320]:
#영화간의 상관관계 도출
corr = np.corrcoef(matrix)
corr.shape

  c /= stddev[:, None]
  c /= stddev[None, :]


(9064, 9064)

In [321]:
movie_title = user_movie_data.columns
movie_title_list = list(movie_title)

In [322]:
coffey_hands = movie_title_list.index('Guardians of the Galaxy (2014)')

In [323]:
#가디언즈 오브 갤럭시와 상관관계가 높은 영화들을 도출
corr_coffey_hands = corr[coffey_hands]
list(movie_title[(corr_coffey_hands >= 0.9)])[:50]

  list(movie_title[(corr_coffey_hands >= 0.9)])[:50]


['Guardians of the Galaxy (2014)',
 'How to Train Your Dragon (2010)',
 'Inside Man (2006)',
 'Source Code (2011)',
 'Star Trek Into Darkness (2013)',
 'Star Wars: Episode VII - The Force Awakens (2015)',
 'X-Men: Days of Future Past (2014)']

## 개인 사용자에게 추천

In [341]:
user_movie_ratings = rating_data.pivot(index = 'userId', columns = 'movieId', values = 'rating')
user_movie_ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,4.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,4.0,...,,,,,,,,,,
5,,,4.0,,,,,,,,...,,,,,,,,,,


In [342]:
#평균값 저장
user_ratings_mean = user_movie_ratings.mean().values

In [344]:
#똑같이 평가안한 것과 0점을 준 것을 구분하기 위해 실제값에서 평균을 뺀 후, 결측값에 0을 주고 진행
user_movie_ratings = (user_movie_ratings - user_movie_ratings.mean()).fillna(0)
user_movie_ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.54918,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.54918,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.838983,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [345]:
#SVD w진행
U, sigma, Vt = svds(user_movie_ratings, k = 12)

In [346]:
U

array([[-0.00044554,  0.00064722, -0.00614431, ...,  0.00068842,
        -0.00395796,  0.00503696],
       [ 0.01213061,  0.00390252, -0.00761358, ..., -0.00114332,
        -0.00490798,  0.00170996],
       [ 0.01545264,  0.00137184,  0.01455146, ..., -0.00629671,
        -0.00938288,  0.00473391],
       ...,
       [-0.0014661 , -0.00383889,  0.00254751, ...,  0.00098441,
        -0.00184423, -0.00148638],
       [-0.00697257, -0.01652356, -0.00255814, ..., -0.01005495,
        -0.0021314 , -0.00044395],
       [-0.00474703, -0.00350768, -0.0034154 , ...,  0.01254032,
         0.01065791, -0.0141348 ]])

In [347]:
sigma

array([28.32571888, 28.43812054, 29.46735877, 29.75250113, 30.73941323,
       31.87613033, 33.82851906, 34.67792267, 41.16944171, 43.39543176,
       46.59352482, 63.62989603])

In [348]:
Vt

array([[-0.05635634, -0.02366715,  0.03722001, ...,  0.        ,
         0.        ,  0.        ],
       [-0.04533713,  0.01352891,  0.04319073, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.0230966 , -0.01538835,  0.02125457, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.02805544,  0.00591471,  0.00820157, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.053005  ,  0.01583603,  0.02413432, ...,  0.        ,
         0.        ,  0.        ],
       [-0.08064607, -0.04619253, -0.01857247, ...,  0.        ,
         0.        ,  0.        ]])

In [349]:
sigma = np.diag(sigma)

In [351]:
#다시 하나의 행렬로 복구
svd_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean

In [352]:
df_svd_preds = pd.DataFrame(svd_user_predicted_ratings, columns = df_user_movie_ratings.columns)
df_svd_preds.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
0,3.819771,3.393238,3.150091,2.382339,3.264667,3.891287,3.285663,3.799115,3.148059,3.433709,...,2.5,0.5,3.0,1.0,1.5,5.0,4.5,5.0,3.0,5.0
1,3.821034,3.380603,3.172825,2.379545,3.273076,3.88495,3.257356,3.797174,3.150258,3.442221,...,2.5,0.5,3.0,1.0,1.5,5.0,4.5,5.0,3.0,5.0
2,3.71564,3.366789,3.166074,2.38734,3.225744,3.864144,3.269005,3.801385,3.153983,3.414733,...,2.5,0.5,3.0,1.0,1.5,5.0,4.5,5.0,3.0,5.0
3,4.453902,3.544275,3.184984,2.369488,3.306064,3.945067,3.27803,3.797334,3.155408,3.494327,...,2.5,0.5,3.0,1.0,1.5,5.0,4.5,5.0,3.0,5.0
4,3.972671,3.502732,3.232406,2.383143,3.31679,3.876252,3.329457,3.80371,3.158492,3.492256,...,2.5,0.5,3.0,1.0,1.5,5.0,4.5,5.0,3.0,5.0


In [336]:
def recommend_movies(df_svd_preds, user_id, ori_movies_df, ori_ratings_df, num_recommendations=5):
    
    #현재는 index로 적용이 되어있으므로 user_id - 1을 해야함.
    user_row_number = user_id - 1 
    
    # 최종적으로 만든 pred_df에서 사용자 index에 따라 영화 데이터 정렬 -> 영화 평점이 높은 순으로 정렬 됨
    sorted_user_predictions = df_svd_preds.iloc[user_row_number].sort_values(ascending=False)
    
    # 원본 평점 데이터에서 user id에 해당하는 데이터를 뽑아낸다. 
    user_data = ori_ratings_df[ori_ratings_df.userId == user_id]
    
    # 위에서 뽑은 user_data와 원본 영화 데이터를 합친다. 
    user_history = user_data.merge(ori_movies_df, on = 'movieId').sort_values(['rating'], ascending=False)
    
    # 원본 영화 데이터에서 사용자가 본 영화 데이터를 제외한 데이터를 추출
    recommendations = ori_movies_df[~ori_movies_df['movieId'].isin(user_history['movieId'])]
    # 사용자의 영화 평점이 높은 순으로 정렬된 데이터와 위 recommendations을 합친다. 
    recommendations = recommendations.merge( pd.DataFrame(sorted_user_predictions).reset_index(), on = 'movieId')
    # 컬럼 이름 바꾸고 정렬해서 return
    recommendations = recommendations.rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :]
                      

    return user_history, recommendations

In [353]:
already_rated, predictions = recommend_movies(df_svd_preds, 330, movie_data, rating_data, 10)

In [354]:
already_rated

Unnamed: 0,userId,movieId,rating,title
47,330,1094,5.0,"Crying Game, The (1992)"
14,330,213,5.0,Burnt by the Sun (Utomlyonnye solntsem) (1994)
34,330,527,5.0,Schindler's List (1993)
45,330,1035,5.0,"Sound of Music, The (1965)"
48,330,1172,5.0,Cinema Paradiso (Nuovo cinema Paradiso) (1989)
...,...,...,...,...
70,330,1777,1.0,"Wedding Singer, The (1998)"
57,330,1407,1.0,Scream (1996)
43,330,1028,1.0,Mary Poppins (1964)
41,330,924,1.0,2001: A Space Odyssey (1968)


In [355]:
predictions

Unnamed: 0,movieId,title,Predictions
4485,6332,"Lizzie McGuire Movie, The (2003)",5.0
4492,6342,"Trip, The (2002)",5.0
4201,5765,"Looney, Looney, Looney Bugs Bunny Movie, The (...",5.0
4232,5828,Blackrock (1997),5.0
4251,5866,They All Laughed (1981),5.0
4303,5960,Bad Influence (1990),5.0
4339,6033,Mystery Date (1991),5.0
4369,6107,Night of the Shooting Stars (Notte di San Lore...,5.0
4392,6163,He Loves Me... He Loves Me Not (À la folie... ...,5.0
4505,6369,Friends and Family (2001),5.0


## 음식 메뉴 추천 

In [396]:
original_df = pd.read_csv('/Users/jayyoon/Desktop/leeminho/rs/data/misik.csv')
original_df.head()

Unnamed: 0,구분,방남진,이예진,김한성,김승원,강진희,황상순
0,진주집 콩국수,3.5,3.0,3.0,4.5,4.0,4.0
1,진주집 비빔냉면,3.5,3.0,3.0,,4.0,
2,산삼골 오리전골,3.5,,4.5,,3.0,4.5
3,여백 매운칼국수,2.0,,4.0,3.5,,
4,황제 부대찌개,3.0,,4.0,4.0,3.5,


In [397]:
original_df = original_df.set_index(['구분'])

In [398]:
user_rating_mean = original_df.mean().values
user_rating_mean

array([3.01724138, 2.58928571, 3.82894737, 3.90625   , 3.64285714,
       3.66666667])

In [399]:
#평가하지 않은 것과 비교하기 위해 평가값 - 평가 평균값
df = original_df - original_df.mean()
df.head()

Unnamed: 0_level_0,방남진,이예진,김한성,김승원,강진희,황상순
구분,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
진주집 콩국수,0.482759,0.410714,-0.828947,0.59375,0.357143,0.333333
진주집 비빔냉면,0.482759,0.410714,-0.828947,,0.357143,
산삼골 오리전골,0.482759,,0.671053,,-0.642857,0.833333
여백 매운칼국수,-1.017241,,0.171053,-0.40625,,
황제 부대찌개,-0.017241,,0.171053,0.09375,-0.142857,


In [400]:
#사용자 개인 추천을 위해 행, 열 전환
df = df.fillna(0).T
df.head()

구분,진주집 콩국수,진주집 비빔냉면,산삼골 오리전골,여백 매운칼국수,황제 부대찌개,가양버섯 칼국수,여백지하 순대국,용호낙지 낙곱새,창고 소고기,MBC앞 수제비,...,여의도 따로국밥,VIPS 샐러드바,애슐리 샐러드바,여의도 봉추찜닭,구마산 추어탕,을밀대 평양냉면,죠스떡볶이 매운떡볶이,아딸 순대,호식이두마리치킨 후라이드,홍콩반점 탕수육
방남진,0.482759,0.482759,0.482759,-1.017241,-0.017241,-0.017241,-0.017241,-0.017241,0.982759,-0.017241,...,-0.017241,-0.517241,-0.517241,-0.017241,-0.017241,-0.517241,-0.517241,-0.017241,-0.017241,-0.017241
이예진,0.410714,0.410714,0.0,0.0,0.0,-0.089286,-1.089286,-0.089286,0.910714,0.0,...,0.0,-0.089286,-0.589286,0.0,0.0,0.0,-0.589286,0.0,-0.089286,0.0
김한성,-0.828947,-0.828947,0.671053,0.171053,0.171053,0.171053,0.671053,0.671053,0.171053,0.671053,...,0.171053,0.171053,-0.328947,0.0,0.0,-0.328947,0.171053,0.171053,0.0,-0.828947
김승원,0.59375,0.0,0.0,-0.40625,0.09375,0.09375,0.09375,0.59375,0.0,0.59375,...,0.0,-0.40625,-0.90625,0.0,0.0,0.0,0.0,-0.40625,0.09375,0.09375
강진희,0.357143,0.357143,-0.642857,0.0,-0.142857,-0.142857,-0.142857,0.357143,0.0,0.0,...,0.0,-0.142857,-0.642857,-0.142857,0.0,0.0,-0.642857,0.0,0.0,0.357143


In [401]:
#SVD 진행
U, sigma, Vt = svds(df, k = 5)

In [402]:
U

array([[ 0.19971191,  0.6987108 , -0.16943037, -0.13315375, -0.5849902 ],
       [-0.07095746, -0.43821967,  0.39299556, -0.04390853, -0.76836682],
       [ 0.26600676, -0.0559293 ,  0.3812418 , -0.84268964,  0.19466606],
       [ 0.15394183, -0.02927907, -0.59131464, -0.38033881, -0.06522249],
       [-0.14830479, -0.50479266, -0.56619769, -0.18004164, -0.15886899],
       [-0.91576645,  0.24691246,  0.03563293, -0.30519452,  0.00327037]])

In [403]:
sigma

array([2.05156571, 2.21152018, 2.50671801, 3.23462092, 4.75751247])

In [404]:
Vt

array([[-0.20474836, -0.10050967, -0.19150385, -0.10732921,  0.03786196,
        -0.3310286 ,  0.14036724,  0.1071541 , -0.06244387,  0.12988333,
        -0.03442663, -0.01588375,  0.05427476, -0.02996982, -0.00167838,
         0.01549581, -0.09272894,  0.01549581,  0.1437341 , -0.03317723,
        -0.10585917,  0.11225769,  0.20578454,  0.20427907,  0.00434323,
         0.56776482, -0.09300286, -0.12125646,  0.16425635,  0.13626769,
         0.00461716,  0.01870323,  0.06441262,  0.02094343,  0.1213904 ,
         0.15262647, -0.05763911, -0.07355842, -0.04929838, -0.05035143,
        -0.21190399,  0.05402932, -0.14856851,  0.02753502, -0.04432982,
        -0.00167838,  0.08841867,  0.01549581,  0.02050037,  0.02915464,
        -0.01975589,  0.00864856, -0.00167838, -0.09300286, -0.11011131,
        -0.15877459,  0.00844439, -0.1279427 ],
       [ 0.03993843,  0.01058319,  0.37532875, -0.32033611,  0.02159362,
         0.13232611,  0.22479402, -0.09410677,  0.16292343, -0.03027903,
   

In [405]:
sigma = np.diag(sigma)
sigma

array([[2.05156571, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 2.21152018, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 2.50671801, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 3.23462092, 0.        ],
       [0.        , 0.        , 0.        , 0.        , 4.75751247]])

In [406]:
print(U.shape)
print(sigma.shape)
print(Vt.shape)

(6, 5)
(5, 5)
(5, 58)


In [407]:
svd_user_predicted_ratings = np.dot(np.dot(U,sigma), Vt) + user_rating_mean.reshape(-1, 1)

In [433]:
df_svd_preds = pd.DataFrame(svd_user_predicted_ratings, columns = df.columns, index = df.index)
df_svd_preds

구분,진주집 콩국수,진주집 비빔냉면,산삼골 오리전골,여백 매운칼국수,황제 부대찌개,가양버섯 칼국수,여백지하 순대국,용호낙지 낙곱새,창고 소고기,MBC앞 수제비,...,여의도 따로국밥,VIPS 샐러드바,애슐리 샐러드바,여의도 봉추찜닭,구마산 추어탕,을밀대 평양냉면,죠스떡볶이 매운떡볶이,아딸 순대,호식이두마리치킨 후라이드,홍콩반점 탕수육
방남진,3.596403,3.470647,3.552311,1.994818,3.035397,3.047889,2.934793,3.018077,3.978957,3.084712,...,2.992504,2.46757,2.44459,3.025647,3.001436,2.560247,2.610229,2.918989,3.014011,3.002874
이예진,2.920824,3.024108,2.546322,2.593542,2.560214,2.460668,1.553554,2.485154,3.517283,2.519712,...,2.595443,2.526635,2.045509,2.568222,2.588107,2.539805,1.909469,2.65582,2.488493,2.586925
김한성,3.060455,2.981592,4.532805,3.99675,4.022198,4.030032,4.459108,4.511336,3.986804,4.553123,...,3.995299,3.979663,3.465252,3.845031,3.829848,3.537781,4.069125,3.949198,3.837734,3.001803
김승원,4.269295,3.976496,3.781063,3.512401,3.915291,3.885395,4.156047,4.45674,3.956608,4.297274,...,3.92419,3.57761,3.132604,3.844873,3.902814,3.762072,3.642458,3.693869,3.966469,3.993121
강진희,4.196216,3.940255,3.106472,3.63231,3.572046,3.597472,3.367281,4.036793,3.600027,3.815276,...,3.627599,3.433992,2.88722,3.552202,3.645779,3.765481,3.224356,3.477971,3.671375,4.00585
황상순,3.974161,3.674534,4.485979,3.668056,3.657179,4.487164,3.684144,3.661822,4.00564,3.643961,...,3.668676,3.508692,3.514852,3.659792,3.666282,3.650519,3.970455,4.021713,3.662911,3.665896


In [439]:
#평점을 부여했던 것과 부여하지 않았던 것들을 비교
def make_recommendation(name, base_df, svd_df):
    temp = base_df.loc[:, name]
    history_list = pd.DataFrame(temp[temp.isna() == False]).index
    recomm_list = pd.DataFrame(temp[temp.isna()]).index
    history_df = pd.DataFrame(base_df.loc[:, name][history_list]).sort_values([name], ascending = False)
    recomm_df = pd.DataFrame(svd_df.T.loc[:, name][recomm_list]).sort_values([name], ascending = False)
    
    comparison_df = pd.concat([history_df.reset_index(), recomm_df.reset_index()], axis = 1)
    comparison_df.columns = ['history', 'rate', 'recommendation', 'rate']
    return comparison_df

In [451]:
make_recommendation('이예진', original_df, df_svd_preds)[:10]

Unnamed: 0,history,rate,recommendation,rate.1
0,스타차이나 탕수육,3.5,백상 뼈해장국,2.818535
1,여의도 희정식당 부대찌개,3.5,짜글이 곱창전골,2.699409
2,스타벅스 카페라떼,3.5,도미노 포테이토피자,2.664985
3,창고 소고기,3.5,아딸 순대,2.65582
4,교촌치킨 오리지날 콤보,3.5,하노이 짬뽕,2.643686
5,송추가마골 뚝배기갈비탕,3.5,박가부대 부대찌개,2.625356
6,IFC 락앤웍 유린기,3.5,밍 게살볶음밥,2.608193
7,진주집 콩국수,3.0,팥이재 팥빙수,2.606958
8,맥도날드 그릴드 머쉬룸 버거 세트,3.0,여백지하 직화구이덮밥,2.602258
9,진주집 비빔냉면,3.0,여의도 따로국밥,2.595443


In [452]:
make_recommendation('김한성', original_df, df_svd_preds)[:10]

Unnamed: 0,history,rate,recommendation,rate.1
0,롯데캐슬 삼계탕,4.5,송추가마골 뚝배기갈비탕,3.868905
1,산삼골 오리전골,4.5,스쿨푸드 스팸계란마리,3.855957
2,여의도 청수 메밀국수,4.5,신길동 스시센 돈코츠라멘,3.847462
3,여백지하 순대국,4.5,여의도 봉추찜닭,3.845031
4,용호낙지 낙곱새,4.5,IFC 락앤웍 유린기,3.842796
5,안성탕면,4.5,스타차이나 탕수육,3.842796
6,MBC앞 수제비,4.5,여의도 희정식당 부대찌개,3.842796
7,신라면,4.5,63워킹온더 클라우드 코스,3.840015
8,백상 뼈해장국,4.5,호식이두마리치킨 후라이드,3.837734
9,여백지하 서울설렁탕,4.0,광화문 국밥,3.830684


In [453]:
make_recommendation('김승원', original_df, df_svd_preds)[:10]

Unnamed: 0,history,rate,recommendation,rate.1
0,진주집 콩국수,4.5,하노이 짬뽕,4.064762
1,롯데캐슬 삼계탕,4.5,처갓집양념치킨,4.063284
2,창고 미역국,4.5,박가부대 부대찌개,4.011352
3,용호낙지 낙곱새,4.5,여의도 청수 메밀국수,4.001285
4,MBC앞 수제비,4.5,BHC 뿌링클 치킨,3.999258
5,신라면,4.5,진주집 비빔냉면,3.976496
6,호식이두마리치킨 후라이드,4.0,밍 게살볶음밥,3.961343
7,여의도 대원 연어구이,4.0,창고 소고기,3.956608
8,팥이재 팥빙수,4.0,여의도 따로국밥,3.92419
9,여백지하 직화구이덮밥,4.0,구마산 추어탕,3.902814


In [454]:
make_recommendation('강진희', original_df, df_svd_preds)[:10]

Unnamed: 0,history,rate,recommendation,rate.1
0,백상 뼈해장국,5.0,창고 미역국,3.936701
1,짜글이 곱창전골,4.5,MBC앞 수제비,3.815276
2,진주집 콩국수,4.0,롯데캐슬 삼계탕,3.807705
3,여백지하 직화구이덮밥,4.0,송추가마골 뚝배기갈비탕,3.772546
4,신라면,4.0,을밀대 평양냉면,3.765481
5,진주집 비빔냉면,4.0,고려정 양곰탕,3.765481
6,홍콩반점 탕수육,4.0,교촌치킨 오리지날 콤보,3.75322
7,용호낙지 낙곱새,4.0,스쿨푸드 스팸계란마리,3.73052
8,여백지하 순대국,3.5,신길동 스시센 돈코츠라멘,3.702949
9,여백지하 서울설렁탕,3.5,IFC 락앤웍 유린기,3.687806


In [455]:
make_recommendation('황상순', original_df, df_svd_preds)[:10]

Unnamed: 0,history,rate,recommendation,rate.1
0,가양버섯 칼국수,4.5,맥도날드 맥스파이시 상하이버거 세트,3.725487
1,산삼골 오리전골,4.5,짜글이 곱창전골,3.702605
2,진주집 콩국수,4.0,맥도날드 그릴드 머쉬룸 버거 세트,3.700667
3,백상 고려호프 마늘통닭,4.0,처갓집양념치킨,3.684254
4,죠스떡볶이 매운떡볶이,4.0,여백지하 순대국,3.684144
5,창고 미역국,4.0,스타벅스 카페라떼,3.677842
6,여백지하 직화구이덮밥,4.0,여의도 청수 메밀국수,3.67731
7,아딸 순대,4.0,BHC 뿌링클 치킨,3.677084
8,백상 뼈해장국,4.0,진주집 비빔냉면,3.674534
9,창고 소고기,4.0,밍 게살볶음밥,3.672837
