1. 컨텐츠기반 추천시스템
2. 컨텐츠를 활용할 수 있는지
3. 어떻게 컨텐츠들이 서로 연관이 있는지 평가하는 방법
4. 근접이웃기반 컨텐츠기반 추천시스템 -> k-nearest neighbor
5. naive bayes classifer -> 베이즈 추천시스템
6. TF-IDF 개념설명 그리고 실습

추천시스템 성능 평가
-> RMSE

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


## Dataset 불러오기

In [None]:
# 각자 작업 환경에 맞는 경로를 지정해주세요. Google Colab과 Jupyter환경에서 경로가 다를 수 있습니다.
path = '/content/drive/MyDrive/data/movielens'

ratings_df = pd.read_csv(os.path.join(path, 'ratings.csv'), encoding='utf-8')
movies_df = pd.read_csv(os.path.join(path, 'movies.csv'), index_col='movieId', encoding='utf-8')
tags_df = pd.read_csv(os.path.join(path, 'tags.csv'), encoding='utf-8')

In [None]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
movies_df.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [None]:
tags_df.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200


## Genres를 이용한 movie representation


In [None]:
total_count = len(movies_df.index)
total_genres = list(set([genre for sublist in list(map(lambda x: x.split('|'), movies_df['genres'])) for genre in sublist]))

In [None]:
print(f"전체 영화 수: {total_count}")
print(f"장르: {total_genres}")

전체 영화 수: 9742
장르: ['Romance', 'Fantasy', 'Mystery', 'Comedy', 'War', 'Thriller', 'Sci-Fi', 'Adventure', 'Documentary', 'Crime', 'IMAX', 'Western', 'Children', 'Animation', 'Horror', '(no genres listed)', 'Film-Noir', 'Action', 'Drama', 'Musical']


In [None]:
print(len(total_genres))

20


In [None]:
genre_count = dict.fromkeys(total_genres)

for each_genre_list in movies_df['genres']:
    for genre in each_genre_list.split('|'):
        if genre_count[genre] == None:
            genre_count[genre] = 1
        else:
            genre_count[genre] = genre_count[genre]+1

In [None]:
genre_count

{'(no genres listed)': 34,
 'Action': 1828,
 'Adventure': 1263,
 'Animation': 611,
 'Children': 664,
 'Comedy': 3756,
 'Crime': 1199,
 'Documentary': 440,
 'Drama': 4361,
 'Fantasy': 779,
 'Film-Noir': 87,
 'Horror': 978,
 'IMAX': 158,
 'Musical': 334,
 'Mystery': 573,
 'Romance': 1596,
 'Sci-Fi': 980,
 'Thriller': 1894,
 'War': 382,
 'Western': 167}

In [None]:
for each_genre in genre_count:
    genre_count[each_genre] = np.log10(total_count/genre_count[each_genre])
  
genre_count

{'(no genres listed)': 2.457169208193496,
 'Action': 0.7266719338379385,
 'Adventure': 0.8872447746804204,
 'Animation': 1.2026069149931968,
 'Children': 1.1664800458677336,
 'Comedy': 0.41392254164167785,
 'Crime': 0.9098289421369025,
 'Documentary': 1.3451954487495636,
 'Drama': 0.3490620385623247,
 'Fantasy': 1.0971106675631868,
 'Film-Noir': 2.0491288726171324,
 'Horror': 0.9983092704481497,
 'IMAX': 1.7899910382813284,
 'Musical': 1.4649016584241867,
 'Mystery': 1.2304935032683613,
 'Romance': 0.7856152382210405,
 'Sci-Fi': 0.9974220495432562,
 'Thriller': 0.7112681505684965,
 'War': 1.4065847623240424,
 'Western': 1.7659316540881678}

In [None]:
# create genre representations
genre_representation = pd.DataFrame(columns=sorted(total_genres), index=movies_df.index)
for index, each_row in tqdm(movies_df.iterrows()):
    dict_temp = {i: genre_count[i] for i in each_row['genres'].split('|')}
    row_to_add = pd.DataFrame(dict_temp, index=[index])
    genre_representation.update(row_to_add)

genre_representation

9742it [00:52, 186.70it/s]


Unnamed: 0_level_0,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,,,0.887245,1.20261,1.16648,0.413923,,,,1.09711,,,,,,,,,,
2,,,0.887245,,1.16648,,,,,1.09711,,,,,,,,,,
3,,,,,,0.413923,,,,,,,,,,0.785615,,,,
4,,,,,,0.413923,,,0.349062,,,,,,,0.785615,,,,
5,,,,,,0.413923,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,,0.726672,,1.20261,,0.413923,,,,1.09711,,,,,,,,,,
193583,,,,1.20261,,0.413923,,,,1.09711,,,,,,,,,,
193585,,,,,,,,,0.349062,,,,,,,,,,,
193587,,0.726672,,1.20261,,,,,,,,,,,,,,,,


## Tag를 이용한 Movie Representation

In [None]:
tags_df.head(5)

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200


In [None]:
movies_df.loc[89774]

title     Warrior (2011)
genres             Drama
Name: 89774, dtype: object

In [None]:
# get unique tag
tag_column = list(map(lambda x: x.split(','), tags_df['tag']))
unique_tags = list(set(list(map(lambda x: x.strip(), list([tag for sublist in tag_column for tag in sublist])))))

print(unique_tags)

['rap', 'slick', 'mental illness', 'movie business', 'unintelligent', 'adultery', 'business', 'artsy', 'suburbia', 'truckers', 'amazing artwork', 'Casey Affleck', 'gothic', 'splatter', 'unlikely hero', 'Shia LaBeouf', 'black-and-white', 'chick flick', 'Brad Pitt', 'claustrophobic', 'cruel characters', 'Rogue', 'In Your Eyes', 'heroin', 'Tarantino', 'Train', 'generation X', 'ben stiller', 'costume drama', 'robbery', 'multiple roles', 'melancholy', 'non-linear timeline', 'conversation', 'cult', 'assassination', 'bad', 'heroine in tight suit', 'futuristic', 'drugs', 'singers', 'Stephen King', 'predictible plot', 'cheeky', 'French', 'happpiness', 'ghosts', 'system holism', 'trains', 'South America', 'technology', 'fish', 'nerds', 'solitude', 'Josh Brolin', 'POW', 'Oscar (Best Supporting Actress)', 'remade', 'Uma Thurman', 'Renee Zellweger', 'gun tactics', 'Shakespeare', 'dark comedy', 'McDonalds', 'conspiracy', 'Clousseau', 'great cinematography', 'Captain America', 'melancholic', 'plastic

In [None]:
print(len(tag_column))
print(len(unique_tags))

3683
1589


In [None]:
# Compute IDF for tag
total_movie_count = len(set(tags_df['movieId']))
# key: tag, value: number of movies with such tag
tag_count_dict = dict.fromkeys(unique_tags)

for each_movie_tag_list in tags_df['tag']:
    for tag in each_movie_tag_list.split(","):
        if tag_count_dict[tag.strip()] == None:
            tag_count_dict[tag.strip()] = 1
        else:
            tag_count_dict[tag.strip()] += 1

tag_idf = dict()
for each_tag in tag_count_dict:
    tag_idf[each_tag] = np.log10(total_movie_count / tag_count_dict[each_tag])

tag_idf

In [None]:
len(tag_idf.keys())

1589

In [None]:
# Create movie representations
tag_representation = pd.DataFrame(columns=sorted(unique_tags), index=list(set(tags_df['movieId'])))
for name, group in tqdm(tags_df.groupby(by='movieId')):
    temp_list = list(map(lambda x: x.split(','), list(group['tag'])))
    temp_tag_list = list(set(list(map(lambda x: x.strip(), list([tag for sublist in temp_list for tag in sublist])))))

    dict_temp = {i: tag_idf[i.strip()] for i in temp_tag_list}
    row_to_add = pd.DataFrame(dict_temp, index=[group['movieId'].values[0]])
    tag_representation.update(row_to_add)

tag_representation = tag_representation.sort_index(0)
tag_representation

100%|██████████| 1572/1572 [05:04<00:00,  5.16it/s]


Unnamed: 0,"""artsy""",06 Oscar Nominated Best Movie - Animation,1900s,1920s,1950s,1960s,1970s,1980s,1990s,2001-like,2D animation,70mm,80's,AIDs,AS Byatt,AWESOME,Aardman,Academy award (Best Supporting Actress),Action,Adam Sandler,Adrien Brody,Adventure,Afghanistan,Africa,Agatha Christie,Al Pacino,Alcatraz,Alfred Hitchcock,Alicia Vikander,Amazing Cinematography,American Indians,American propaganda,Amish,Amtrak,Amy Adams,Andrew Lloyd Weber,Andy Garcia,Andy Kaufman,Andy Samberg,Angelina Jolie,...,violence,violence in america,violent,virginity,virtual reality,visual,visually appealing,visually stunning,von Bulow,voyeurism,wapendrama,war,way too long,weak plot,weather forecaster,wedding,weddings,weird,werewolf,western,whales,whimsical,white guilt,widows/widowers,will ferrell,wine,winona ryder,wistful,witty,wizards,women,wonderwoman,workplace,writing,wrongful imprisonment,wry,younger men,zither,zoe kazan,zombies
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183611,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
184471,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.19645,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
187593,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
187595,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


- 예시로 살펴볼까요? 어떤 영화에 대해서 어떤 tag들이 있는지 한번 살펴봅시다.

In [None]:
movies_df.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [None]:
tag_representation.loc[1].dropna()

fun      2.49748
pixar    2.89542
Name: 1, dtype: object

In [None]:
tag_representation.loc[2].dropna()

Robin Williams      2.71933
fantasy              2.4183
game                3.19645
magic board game    3.19645
Name: 2, dtype: object

In [None]:
print(genre_representation.shape)
print(tag_representation.shape)

(9742, 20)
(1572, 1589)


## Final Movie Represenation
- genre와 tag로 만들어진 representation을 합쳐서 각 movie의 vector로 만든다

In [None]:
movie_representation = pd.concat([genre_representation, tag_representation], axis=1).fillna(0)
print(movie_representation.shape)
print(movie_representation.describe())

(9742, 1609)
       (no genres listed)       Action  ...    zoe kazan      zombies
count         9742.000000  9742.000000  ...  9742.000000  9742.000000
mean             0.008576     0.136354  ...     0.000328     0.001241
std              0.144915     0.283726  ...     0.032385     0.054775
min              0.000000     0.000000  ...     0.000000     0.000000
25%              0.000000     0.000000  ...     0.000000     0.000000
50%              0.000000     0.000000  ...     0.000000     0.000000
75%              0.000000     0.000000  ...     0.000000     0.000000
max              2.457169     0.726672  ...     3.196453     2.418301

[8 rows x 1609 columns]


## Contents 유사도 평가
- Cosine similarity를 사용

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def cos_sim_matrix(a, b):
    cos_sim = cosine_similarity(a, b)
    result_df = pd.DataFrame(data=cos_sim, index=[a.index])

    return result_df

In [None]:
print(movie_representation.head())

   (no genres listed)  Action  Adventure  ...  zither  zoe kazan  zombies
1                 0.0     0.0   0.887245  ...     0.0        0.0      0.0
2                 0.0     0.0   0.887245  ...     0.0        0.0      0.0
3                 0.0     0.0   0.000000  ...     0.0        0.0      0.0
4                 0.0     0.0   0.000000  ...     0.0        0.0      0.0
5                 0.0     0.0   0.000000  ...     0.0        0.0      0.0

[5 rows x 1609 columns]


In [None]:
cs_df = cos_sim_matrix(movie_representation, movie_representation)
cs_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9702,9703,9704,9705,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715,9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731,9732,9733,9734,9735,9736,9737,9738,9739,9740,9741
1,1.0,0.124438,0.008403,0.040571,0.011755,0.0,0.016339,0.331122,0.0,0.131794,0.01146,0.035819,0.428332,0.0,0.127943,0.0,0.0,0.093519,0.093519,0.02637,0.012976,0.0,0.0,0.0,0.0,0.0,0.252486,0.0,0.139012,0.0,0.0,0.0,0.058681,0.0,0.127412,0.005151,0.0,0.0,0.0,0.0,...,0.0,0.046287,0.093519,0.164693,0.117018,0.103766,0.399957,0.007761,0.098841,0.0,0.2018,0.254479,0.0,0.043593,0.093519,0.332223,0.071492,0.0,0.131794,0.0,0.0,0.036562,0.0,0.0,0.093519,0.0,0.0,0.27171,0.0,0.13748,0.064466,0.260941,0.071492,0.27171,0.0,0.348295,0.379492,0.0,0.232553,0.093519
2,0.124438,1.0,0.0,0.0,0.0,0.0,0.0,0.240843,0.0,0.095861,0.0,0.0,0.186183,0.0,0.09306,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.183647,0.0,0.101111,0.0,0.0,0.0,0.042682,0.0,0.082309,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.11979,0.085114,0.075475,0.17385,0.0,0.071892,0.0,0.12849,0.170429,0.0,0.0,0.0,0.222496,0.0,0.0,0.095861,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.082123,0.0,0.0,0.0,0.0,0.0,0.108082,0.117763,0.0,0.0,0.0
3,0.008403,0.0,1.0,0.179391,0.011294,0.0,0.072246,0.0,0.0,0.0,0.050673,0.034413,0.0,0.0,0.096374,0.0,0.049018,0.089849,0.089849,0.025335,0.012466,0.0,0.0,0.0,0.050722,0.0,0.0,0.045593,0.0,0.0,0.0,0.0,0.0,0.0,0.01369,0.022777,0.0,0.0,0.0,0.0,...,0.0,0.044471,0.089849,0.0,0.0,0.0,0.0,0.007456,0.0,0.0,0.024159,0.019374,0.0,0.192754,0.089849,0.025292,0.068686,0.0,0.0,0.0,0.0,0.035127,0.0,0.155841,0.089849,0.0,0.0,0.0,0.0,0.023609,0.00656,0.0,0.068686,0.0,0.0,0.020322,0.022142,0.0,0.0,0.089849
4,0.040571,0.0,0.179391,1.0,0.05453,0.0,0.348828,0.0,0.0,0.0,0.282473,0.166156,0.0,0.039185,0.465326,0.053144,0.2834,0.433821,0.433821,0.209317,0.060192,0.042043,0.0,0.120845,0.293251,0.059511,0.104881,0.263595,0.039466,0.131045,0.03317,0.0,0.024376,0.03061,0.066099,0.109975,0.037651,0.049771,0.105052,0.057419,...,0.0,0.21472,0.433821,0.0,0.0,0.074504,0.0,0.036,0.0,0.0,0.116646,0.093542,0.131045,0.930677,0.433821,0.122119,0.567487,0.0,0.0,0.0,0.0,0.290218,0.0,0.900999,0.433821,0.365843,0.365843,0.0,0.0,0.113993,0.031674,0.101979,0.567487,0.0,0.0,0.098119,0.106908,0.365843,0.0,0.433821
5,0.011755,0.0,0.011294,0.05453,1.0,0.0,0.640342,0.0,0.0,0.0,0.015403,0.048143,0.0,0.0,0.0,0.0,0.0,0.125697,0.125697,0.035443,0.01744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.213067,0.0,0.0,0.019152,0.006924,0.0,0.0,0.0,0.0,...,0.0,0.062214,0.125697,0.0,0.0,0.0,0.0,0.010431,0.0,0.0,0.033798,0.027103,0.0,0.058592,0.125697,0.035383,0.096091,0.0,0.0,0.0,0.0,0.049142,0.0,0.0,0.125697,0.0,0.0,0.0,0.0,0.033029,0.009177,0.0,0.096091,0.0,0.0,0.028429,0.030976,0.0,0.0,0.125697


In [None]:
cs_df.shape

(9742, 9742)

In [None]:
print(cs_df.shape)
print(cs_df[1].sort_values(ascending=False))

(9742, 9742)
2         1.000000
46972     0.322201
126142    0.300850
2043      0.300850
2399      0.300850
            ...   
39449     0.000000
39516     0.000000
39715     0.000000
39869     0.000000
7299      0.000000
Name: 1, Length: 9742, dtype: float64


In [None]:
print(movies_df.loc[1])
print(movies_df.loc[46972])
print(movies_df.loc[126142])
print(movies_df.loc[2043])
print(movies_df.loc[2399])

title                                Toy Story (1995)
genres    Adventure|Animation|Children|Comedy|Fantasy
Name: 1, dtype: object
title     Night at the Museum (2006)
genres    Action|Comedy|Fantasy|IMAX
Name: 46972, dtype: object
title     The Cave of the Golden Rose (1991)
genres            Adventure|Children|Fantasy
Name: 126142, dtype: object
title     Darby O'Gill and the Little People (1959)
genres                   Adventure|Children|Fantasy
Name: 2043, dtype: object
title     Santa Claus: The Movie (1985)
genres       Adventure|Children|Fantasy
Name: 2399, dtype: object


## 추천시스템의 성능 평가
- 학습셋과 테스트셋을 나눈다.
- 테스트셋에서 예측한 평점과 실제 평점의 RMSE를 구한다.

In [None]:
train_df, test_df = train_test_split(ratings_df, test_size=0.2, random_state=1234)

In [None]:
print(train_df.shape)
print(test_df.shape)

(80668, 4)
(20168, 4)


In [None]:
test_userids = list(set(test_df.userId.values))
test_userids

In [None]:
result_df = pd.DataFrame()

for user_id in tqdm(test_userids):
    user_record_df = train_df.loc[train_df.userId == int(user_id), :]
    
    user_sim_df = cs_df.loc[user_record_df['movieId']]  # (n, 9742); n은 userId가 평점을 매긴 영화 수
    user_rating_df = user_record_df[['rating']]  # (n, 1)
    sim_sum = np.sum(user_sim_df.T.to_numpy(), -1)  # (9742, 1)
    # print("user_id=", i, user_record_df.shape, user_sim_df.T.shape, user_rating_df.shape, sim_sum.shape)

    prediction = np.matmul(user_sim_df.T.to_numpy(), user_rating_df.to_numpy()).flatten() / (sim_sum+1) # (9742, 1)

    prediction_df = pd.DataFrame(prediction, index=cs_df.index).reset_index()
    prediction_df.columns = ['movieId', 'pred_rating']    
    prediction_df = prediction_df[['movieId', 'pred_rating']][prediction_df.movieId.isin(test_df[test_df.userId == user_id]['movieId'].values)]

    temp_df = prediction_df.merge(test_df[test_df.userId == user_id], on='movieId')
    result_df = pd.concat([result_df, temp_df], axis=0)



100%|██████████| 610/610 [00:15<00:00, 39.97it/s]


In [None]:
result_df.head(10)

Unnamed: 0,movieId,pred_rating,userId,rating,timestamp
0,1,4.145652,1,4.0,964982703
1,50,3.650755,1,5.0,964982931
2,216,2.670124,1,5.0,964981208
3,223,2.612844,1,3.0,964980985
4,231,4.215284,1,5.0,964981179
5,235,3.61982,1,4.0,964980908
6,316,4.136756,1,3.0,964982310
7,457,3.218743,1,5.0,964981909
8,543,3.729524,1,4.0,964981179
9,592,4.024728,1,4.0,964982271


In [None]:
mse = mean_squared_error(y_true=result_df['rating'].values, y_pred=result_df['pred_rating'].values)
rmse = np.sqrt(mse)

print(mse, rmse)

1.40606646706041 1.1857767357561078
