In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd
import numpy as np
from math import sqrt
from tqdm import tqdm_notebook as tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load Dataset

In [4]:
path = '/content/drive/MyDrive/data/movielens'
ratings_df = pd.read_csv(os.path.join(path, 'ratings.csv'), encoding='utf-8')

print(ratings_df.shape)
ratings_df.head(3)

(100836, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224


In [5]:
train_df , test_df = train_test_split(ratings_df, test_size = 0.2, random_state=216)

print(train_df.shape)
print(test_df.shape)

(80668, 4)
(20168, 4)


# Make **Sparse Matrix**

sparse matrix = (user, movie)

unstack() : 두개의 인덱스 형태의 데이터프레임을 행과 열로 펼친 형태

In [7]:
sparse_matrix = train_df.groupby('movieId').apply(lambda x: pd.Series(x['rating'].values, index = x['userId'])).unstack()
sparse_matrix.index.name = 'movieId'

sparse_matrix

userId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,4.0,,,,4.0,,,,,,,,,,2.5,,4.5,3.5,4.0,,3.5,,,,,,3.0,,,,5.0,3.0,3.0,,,,,,,5.0,...,,,,,,,,,4.0,3.0,,,,5.0,,,5.0,,,4.0,,,,,,4.0,4.0,,,,4.0,,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,,,,,,,,,,,,,,,,,,3.0,3.0,3.0,3.5,,,,,,4.0,,,,,,,,,,,,,,...,,,4.5,,,,,,,,,,,,,4.0,,,,2.5,,4.0,,4.0,,,,,2.5,4.0,,4.0,,,,,,,,
3,,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,3.0,,3.0,,,,,,,,,1.5,,,,,,,,,,,
4,,,,,,3.0,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.5,,,,,,,,,,
5,,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,2.5,,,,3.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
193571,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
193585,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
193587,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


NaN값을 단순히 0값으로 처리할지 혹은 다른 값으로 처리할지 생각

# Cosine Similarity

In [15]:
from sklearn.metrics.pairwise import cosine_similarity

def cossim_matrix(a, b):
  cossim_values = cosine_similarity(a.values, b.values)
  cossim_df = pd.DataFrame(data=cossim_values, columns = a.index.values, index = a.index)

  return cossim_df

# Neighborhood-based Collaborative Filtering Recommendation Score Calculation

## Item-based

In [17]:
item_sparse_matrix = sparse_matrix.fillna(0)
item_sparse_matrix.head(3)

userId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.5,0.0,4.5,3.5,4.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,5.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,5.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0,3.5,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,2.5,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,2.5,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# 코사인 유사도 계산
item_cossim_df = cossim_matrix(item_sparse_matrix, item_sparse_matrix)

item_cossim_df

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,39,40,41,42,43,44,...,184349,184471,184641,184721,184791,184931,184987,184997,185029,185031,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188833,189043,189111,189333,189547,189713,190183,190207,190209,190213,190215,190219,193567,193571,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.000000,0.338593,0.248150,0.000000,0.249282,0.284358,0.195245,0.164256,0.190271,0.372422,0.323219,0.161144,0.148402,0.128646,0.080951,0.266517,0.246667,0.103418,0.329250,0.107634,0.296347,0.245643,0.133319,0.184454,0.255536,0.095883,0.113964,0.065155,0.157186,0.0,0.166550,0.403893,0.423482,0.233110,0.312886,0.000000,0.102022,0.070525,0.070574,0.231378,...,0.055527,0.093841,0.0,0.027763,0.086326,0.074036,0.074036,0.074036,0.049187,0.000000,0.043337,0.0,0.074036,0.000000,0.074036,0.039239,0.065338,0.051238,0.107912,0.027763,0.027763,0.117791,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.027763,0.055527,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.338593,1.000000,0.205624,0.033261,0.191295,0.212895,0.188050,0.102979,0.054976,0.372042,0.254885,0.211760,0.101857,0.019072,0.119582,0.202220,0.157270,0.174060,0.502021,0.130262,0.224622,0.169015,0.032027,0.174568,0.136986,0.127927,0.113604,0.000000,0.146621,0.0,0.181167,0.270493,0.300991,0.087209,0.313115,0.000000,0.072114,0.031118,0.088574,0.316808,...,0.000000,0.165621,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.121956,0.151838,0.0,0.000000,0.106712,0.000000,0.000000,0.000000,0.145173,0.154573,0.000000,0.000000,0.107795,0.106712,0.106712,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.248150,0.205624,1.000000,0.126850,0.356283,0.212673,0.309395,0.227943,0.147490,0.227477,0.186560,0.119435,0.049723,0.106682,0.038060,0.262025,0.143630,0.195853,0.258646,0.084179,0.280670,0.291494,0.191440,0.229600,0.123410,0.155906,0.156097,0.068311,0.135631,0.0,0.147033,0.245105,0.156506,0.170456,0.226373,0.139535,0.171234,0.136083,0.143566,0.153257,...,0.000000,0.000000,0.0,0.000000,0.098604,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.082532,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.000000,0.033261,0.126850,1.000000,0.171802,0.060098,0.245244,0.000000,0.000000,0.055815,0.000000,0.000000,0.000000,0.000000,0.000000,0.081942,0.145855,0.000000,0.078931,0.000000,0.035250,0.148951,0.000000,0.168814,0.154374,0.255713,0.000000,0.084773,0.050229,0.0,0.200334,0.116916,0.014712,0.227527,0.075134,0.000000,0.179496,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.249282,0.191295,0.356283,0.171802,1.000000,0.294979,0.427024,0.166875,0.409434,0.133607,0.181711,0.182404,0.057242,0.194401,0.017064,0.178291,0.226479,0.175478,0.268262,0.017942,0.122131,0.236703,0.108034,0.209779,0.221520,0.166561,0.234386,0.024672,0.148503,0.0,0.226075,0.272158,0.178409,0.209915,0.216351,0.000000,0.191751,0.000000,0.160128,0.049525,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193567,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.752577,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
193571,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.752577,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
193585,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.752577,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
193587,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.752577,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0


In [20]:
# train_df에서 userId를 기준으로 groupby
userId_grouped = train_df.groupby('userId')

# 예측 결과를 담을 데이터프레임 생성
item_prediction_result_df = pd.DataFrame(index = list(userId_grouped.indices.keys()),
                                         columns= item_sparse_matrix.index)
item_prediction_result_df

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,39,40,41,42,43,44,...,184349,184471,184641,184721,184791,184931,184987,184997,185029,185031,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188833,189043,189111,189333,189547,189713,190183,190207,190209,190213,190215,190219,193567,193571,193585,193587,193609
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
607,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
608,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
609,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


groupby객체 for문 출력

In [27]:
for userId, group in tqdm(userId_grouped):
  print('userId: ',userId)
  print(group)
  break

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(FloatProgress(value=0.0, max=610.0), HTML(value='')))

userId:  1
     userId  movieId  rating  timestamp
195       1     2991     5.0  964982271
121       1     2028     4.0  964981888
231       1     5060     5.0  964984002
213       1     3441     5.0  964982328
60        1     1073     5.0  964981680
..      ...      ...     ...        ...
30        1      552     4.0  964982653
54        1     1030     3.0  964982903
120       1     2018     5.0  964980523
39        1      673     3.0  964981775
102       1     1587     5.0  964982346

[189 rows x 4 columns]


In [28]:
for userId, group in tqdm(userId_grouped):
  user_sim = item_cossim_df.loc[group['movieId']] # user가 rating한 movieId들의 코사인 유사도
  user_rating = group['rating']  # user의 실제 rating 점수
  sim_sum = user_sim.sum(axis=0)  

  pred_ratings = np.matmul(user_sim.T.to_numpy(), user_rating) / (sim_sum + 1)
  item_prediction_result_df.loc[userId] = pred_ratings

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(FloatProgress(value=0.0, max=610.0), HTML(value='')))




user 1번이 각 영화들에 어떤 평가를 매길지 예측

In [29]:
item_prediction_result_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,39,40,41,42,43,44,...,184349,184471,184641,184721,184791,184931,184987,184997,185029,185031,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188833,189043,189111,189333,189547,189713,190183,190207,190209,190213,190215,190219,193567,193571,193585,193587,193609
1,4.3051,4.25624,4.22166,3.64554,4.09433,4.28323,4.14305,4.14074,3.77429,4.26571,4.25101,4.16387,4.18333,4.04377,3.86898,4.29201,4.22528,4.22252,4.22201,4.09658,4.28302,4.15007,4.16112,4.14721,4.21931,4.15622,3.90092,3.87273,4.26531,3.68233,4.20214,4.30016,4.26757,4.21756,4.26455,0.501115,4.19163,4.06856,4.1945,4.2273,...,2.09463,4.12914,0.414114,3.69604,4.09327,3.86679,3.86679,3.2656,3.90413,2.58503,3.619,3.67802,3.86679,3.74304,3.86679,3.40705,2.43665,3.98144,4.25128,3.69604,3.69604,4.12659,3.75663,3.75663,1.22102,1.22102,1.22745,1.72224,3.69604,2.5177,1.22102,1.22102,1.22102,1.22102,1.22102,0.414114,0.414114,0.414114,0.414114,1.97352
2,3.19653,3.1903,2.41546,0.0874558,2.33116,2.91105,2.25134,2.39236,1.10121,3.00814,2.63084,2.45217,1.37574,0.583255,1.42076,3.21133,2.54795,3.0219,3.19135,3.05437,2.47405,2.4503,2.29195,2.3665,2.2681,2.1187,1.67409,1.09695,2.39109,0.0,2.60664,3.16922,2.7834,2.15746,2.8052,0.0,1.68757,2.51203,2.03661,2.98439,...,0.0,2.66801,0.99081,0.803101,2.17111,2.28001,2.28001,1.81037,2.94227,2.35583,2.56577,2.8548,2.28001,2.73325,2.28001,1.64536,1.86546,3.03378,2.74358,0.803101,0.803101,2.0877,2.54705,2.54705,0.0,0.0,0.896189,0.345084,0.803101,1.74059,0.0,0.0,0.0,0.0,0.0,0.99081,0.99081,0.99081,0.99081,2.07181
3,1.2932,1.24534,1.13391,0.203736,0.710221,1.43593,0.938842,1.0519,0.885202,1.38991,0.964274,1.69176,0.626665,0.320519,0.510623,1.18905,0.771361,1.10818,1.06401,1.41485,1.07499,1.08471,0.871938,0.993656,0.844264,0.670108,0.713949,0.222029,1.37863,1.83553,1.04843,1.38658,1.13335,0.845031,1.02126,0.0,0.743399,1.0459,0.850564,1.59088,...,0.225807,1.60301,0.0,0.0127142,0.762717,1.43573,1.43573,0.668273,1.06848,0.0847576,0.541372,0.0641133,1.43573,0.847821,1.43573,0.948433,0.111512,0.852972,1.55771,0.0127142,0.0127142,1.54134,0.0325261,0.0325261,0.040007,0.040007,0.0,0.0,0.0127142,0.136645,0.040007,0.040007,0.040007,0.040007,0.040007,0.0,0.0,0.0,0.0,0.0
4,3.43102,3.39681,3.27377,2.60598,3.37083,3.41569,3.41048,3.11164,2.78093,3.3716,3.44531,3.18287,3.25202,3.29292,3.1776,3.37444,3.47035,3.31238,3.32911,3.21754,3.43117,3.2311,3.12935,3.26462,3.38184,3.34946,3.00317,3.18379,3.47726,2.86662,3.33686,3.43906,3.42895,3.48217,3.43616,0.259513,3.28916,3.08435,3.39864,3.26108,...,1.84748,3.03896,0.506608,3.13484,3.12308,3.12528,3.12528,2.52236,3.12823,1.58877,2.47519,2.75448,3.12528,2.83153,3.12528,2.69917,1.7452,2.86612,3.19037,3.13484,3.13484,2.93025,2.77518,2.77518,0.333941,0.333941,0.783917,1.12179,3.13484,1.80296,0.333941,0.333941,0.333941,0.333941,0.333941,0.506608,0.506608,0.506608,0.506608,1.38619
5,3.28569,3.15679,3.079,2.6181,3.04094,3.13976,3.04128,2.90914,2.49483,3.17695,3.16874,2.57495,2.64177,3.0099,2.76408,3.14221,3.17633,2.89388,3.1188,2.60751,3.23566,3.05636,2.85036,2.85168,3.12521,2.94402,2.35766,2.58546,2.99433,1.38478,3.04108,3.16735,3.26251,3.27485,3.1935,0.0,2.82961,2.45551,2.92793,3.0236,...,0.325958,2.20867,0.0,2.07046,2.15737,1.77497,1.77497,1.65695,2.02762,1.1374,1.83891,1.67833,1.77497,1.82458,1.77497,1.37407,0.793476,2.15334,2.55562,2.07046,2.07046,2.19331,1.86549,1.86549,0.797675,0.797675,0.119347,0.0366243,2.07046,0.723091,0.797675,0.797675,0.797675,0.797675,0.797675,0.0,0.0,0.0,0.0,0.838844


## User-based

In [30]:
user_sparse_matrix = sparse_matrix.fillna(0).transpose() # 유저를 기준으로 하기 위해 행으로 변환

user_sparse_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,39,40,41,42,43,44,...,184349,184471,184641,184721,184791,184931,184987,184997,185029,185031,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188833,189043,189111,189333,189547,189713,190183,190207,190209,190213,190215,190219,193567,193571,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
# 유저끼리의 코사인 유사도
user_cossim_df = cossim_matrix(user_sparse_matrix, user_sparse_matrix)
user_cossim_df

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.000000,0.031872,0.046069,0.173524,0.086177,0.094118,0.099682,0.099769,0.051453,0.020411,0.097804,0.000000,0.112396,0.065067,0.133911,0.161681,0.214741,0.165120,0.255602,0.130537,0.109648,0.054691,0.092076,0.132334,0.052938,0.123969,0.164198,0.158098,0.120432,0.076375,0.154168,0.144288,0.133777,0.077864,0.038537,0.058670,0.116552,0.110644,0.253120,0.048433,...,0.062075,0.094488,0.185566,0.058868,0.024769,0.045980,0.249787,0.000000,0.086565,0.180652,0.063732,0.049223,0.051377,0.067011,0.046972,0.108599,0.080549,0.148718,0.101682,0.255188,0.091126,0.106770,0.163208,0.106951,0.129567,0.107024,0.271102,0.014017,0.232277,0.243810,0.079718,0.164544,0.192599,0.021529,0.104645,0.131272,0.225526,0.229789,0.075088,0.115446
2,0.031872,1.000000,0.000000,0.000000,0.000000,0.000000,0.032829,0.000000,0.000000,0.071654,0.000000,0.000000,0.050857,0.000000,0.058269,0.073817,0.047446,0.112771,0.005923,0.000000,0.061397,0.126934,0.000000,0.107413,0.152012,0.000000,0.000000,0.037727,0.079554,0.106763,0.000000,0.000000,0.024426,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.022921,0.133879,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.049291,0.104102,0.184655,0.000000,0.000000,0.122353,0.081079,0.000000,0.033838,0.000000,0.030102,0.045737,0.000000,0.050240,0.015970,0.000000,0.081110,0.000000,0.000000,0.085331,0.009698,0.198697,0.000000,0.006460,0.000000,0.000000,0.014836,0.000000,0.028688,0.000000,0.075387
3,0.046069,0.000000,1.000000,0.002744,0.005856,0.004748,0.000000,0.006310,0.000000,0.000000,0.000000,0.000000,0.000000,0.003690,0.019589,0.006284,0.011612,0.016606,0.008969,0.002057,0.003160,0.000489,0.003538,0.002976,0.000000,0.000000,0.002857,0.011900,0.004075,0.000000,0.000000,0.018610,0.005978,0.000859,0.000000,0.000000,0.000000,0.005026,0.002191,0.003005,...,0.016666,0.007853,0.000000,0.000000,0.000000,0.028004,0.029386,0.000000,0.004262,0.017697,0.011001,0.000000,0.000000,0.000000,0.000000,0.001157,0.005252,0.004658,0.005429,0.026094,0.000000,0.000000,0.006194,0.000000,0.000000,0.020441,0.017213,0.000000,0.030241,0.007459,0.006097,0.006047,0.027930,0.000000,0.000000,0.005128,0.022841,0.005659,0.000000,0.028844
4,0.173524,0.000000,0.002744,1.000000,0.123816,0.088467,0.069144,0.047033,0.014626,0.029601,0.060023,0.050722,0.069222,0.022450,0.069357,0.109919,0.151693,0.098085,0.180462,0.093657,0.040693,0.020446,0.058925,0.071014,0.005093,0.077524,0.132563,0.107534,0.036882,0.052842,0.138414,0.204642,0.122759,0.035621,0.065833,0.097839,0.052901,0.113154,0.217162,0.057147,...,0.024559,0.138656,0.120717,0.044430,0.076254,0.000000,0.123896,0.000000,0.062886,0.112820,0.015854,0.040282,0.050424,0.032235,0.047325,0.059643,0.118939,0.057572,0.057807,0.181364,0.071831,0.067424,0.193762,0.078586,0.082725,0.087994,0.222555,0.018136,0.159023,0.171357,0.062089,0.118994,0.236677,0.045103,0.073965,0.178424,0.106975,0.107237,0.027156,0.093590
5,0.086177,0.000000,0.005856,0.123816,1.000000,0.233511,0.091734,0.307291,0.000000,0.036516,0.155672,0.071525,0.020456,0.231583,0.106360,0.077137,0.140811,0.104957,0.095958,0.067604,0.071463,0.019042,0.059723,0.092991,0.000000,0.210270,0.102039,0.089708,0.017860,0.044245,0.083672,0.211459,0.228038,0.031386,0.303591,0.026657,0.283519,0.349198,0.110241,0.330530,...,0.025362,0.124746,0.113455,0.330535,0.000000,0.000000,0.121325,0.000000,0.154971,0.077466,0.046995,0.000000,0.150476,0.273945,0.000000,0.071721,0.141133,0.315695,0.244989,0.146521,0.000000,0.301774,0.148795,0.134418,0.036279,0.092574,0.108969,0.000000,0.080583,0.105947,0.061584,0.339848,0.111207,0.265796,0.147099,0.084021,0.164894,0.125535,0.217341,0.055367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.131272,0.014836,0.005128,0.178424,0.084021,0.098098,0.169540,0.096171,0.056793,0.064926,0.061263,0.051772,0.041431,0.076847,0.147501,0.156517,0.175456,0.192035,0.162478,0.114100,0.124932,0.116912,0.127073,0.113450,0.053132,0.037208,0.094108,0.207008,0.078608,0.047264,0.072337,0.106257,0.156721,0.094677,0.040624,0.081211,0.046756,0.107510,0.140990,0.094393,...,0.042701,0.160876,0.142269,0.040281,0.049719,0.025769,0.121014,0.027271,0.099759,0.230477,0.125576,0.058476,0.059160,0.062799,0.092003,0.097011,0.150567,0.075846,0.096740,0.252158,0.071814,0.069997,0.168653,0.098280,0.096547,0.151171,0.195016,0.062936,0.259240,0.250525,0.135168,0.093578,0.241385,0.042136,0.124585,1.000000,0.125191,0.213189,0.053609,0.155602
607,0.225526,0.000000,0.022841,0.106975,0.164894,0.139223,0.159370,0.133526,0.016020,0.012196,0.188916,0.028275,0.054826,0.130123,0.155036,0.084411,0.201722,0.153504,0.210575,0.037266,0.124067,0.008551,0.062186,0.129539,0.027893,0.132682,0.133301,0.124845,0.030553,0.103741,0.120939,0.116277,0.142930,0.105240,0.027041,0.045604,0.139069,0.156604,0.240303,0.099527,...,0.137106,0.135521,0.193979,0.145999,0.058729,0.006999,0.221925,0.000000,0.149127,0.189509,0.048238,0.030803,0.011731,0.123261,0.009653,0.117171,0.122989,0.161051,0.123020,0.226413,0.072386,0.136234,0.137594,0.171957,0.034756,0.121417,0.252094,0.000000,0.178249,0.143350,0.086635,0.163083,0.184391,0.111342,0.104692,0.125191,1.000000,0.228245,0.146246,0.124808
608,0.229789,0.028688,0.005659,0.107237,0.125535,0.136012,0.262982,0.144346,0.098032,0.062310,0.114040,0.019458,0.111343,0.135309,0.181082,0.139033,0.181394,0.264870,0.275601,0.219379,0.187140,0.143862,0.127168,0.140364,0.072652,0.098078,0.099598,0.274892,0.080077,0.072305,0.102671,0.138859,0.159407,0.201446,0.075092,0.063802,0.116848,0.148701,0.220022,0.104436,...,0.085551,0.137115,0.319236,0.106176,0.060067,0.061620,0.157821,0.000000,0.143734,0.360161,0.142166,0.055298,0.084233,0.119029,0.139860,0.160160,0.092801,0.151719,0.095329,0.330473,0.125647,0.180193,0.202667,0.162363,0.029214,0.245327,0.207448,0.045023,0.333165,0.286895,0.139550,0.175035,0.199824,0.116274,0.142667,0.213189,0.228245,1.000000,0.101121,0.261918
609,0.075088,0.000000,0.000000,0.027156,0.217341,0.150754,0.023884,0.257619,0.000000,0.005890,0.133886,0.000000,0.000000,0.182585,0.081479,0.045347,0.113374,0.098716,0.083221,0.000000,0.067321,0.032251,0.031366,0.095110,0.033138,0.240821,0.013256,0.077769,0.030248,0.067221,0.093269,0.084738,0.172564,0.082394,0.086050,0.045148,0.245402,0.228000,0.090343,0.178166,...,0.042955,0.080963,0.110058,0.316613,0.000000,0.000000,0.086935,0.026974,0.042182,0.095530,0.000000,0.000000,0.038712,0.206560,0.000000,0.074885,0.018047,0.309760,0.136567,0.089336,0.000000,0.257324,0.053636,0.092674,0.000000,0.038354,0.067434,0.000000,0.063248,0.073755,0.044701,0.263376,0.061936,0.211699,0.063151,0.053609,0.146246,0.101121,1.000000,0.049072


In [32]:
# 예측 결과를 입력할 빈 데이터프레임 생성
movieId_grouped = train_df.groupby('movieId')
user_prediction_result_df = pd.DataFrame(index=list(movieId_grouped.indices.keys()),
                                         columns = user_sparse_matrix.index)
user_prediction_result_df

userId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
193571,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
193585,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
193587,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [42]:
for movieId, group in tqdm(movieId_grouped):
  user_sim = user_cossim_df.loc[group['userId']]  # 1번 영화를 평가한 유저들의 코사인 유사도
  user_rating = group['rating']  # 1번 영화를 평가한 유저들의 rating점수들
  sim_sum = user_sim.sum(axis=0)  

  pred_ratings = np.matmul(user_sim.T.to_numpy(), user_rating) / (sim_sum + 1)  
  user_prediction_result_df.loc[movieId] = pred_ratings


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(FloatProgress(value=0.0, max=8985.0), HTML(value='')))




In [43]:
print(item_prediction_result_df.shape)
print(user_prediction_result_df.transpose().shape)

(610, 8985)
(610, 8985)


In [44]:
# 전체 user가 모든 movieId에 매긴 평점
print(item_prediction_result_df.head())
print(user_prediction_result_df.transpose().head())

user_prediction_result_df = user_prediction_result_df.transpose()

movieId   1        2        3       ...    193585    193587    193609
1         4.3051  4.25624  4.22166  ...  0.414114  0.414114   1.97352
2        3.19653   3.1903  2.41546  ...   0.99081   0.99081   2.07181
3         1.2932  1.24534  1.13391  ...         0         0         0
4        3.43102  3.39681  3.27377  ...  0.506608  0.506608   1.38619
5        3.28569  3.15679    3.079  ...         0         0  0.838844

[5 rows x 8985 columns]
         1        2         3       ...     193585     193587    193609
userId                              ...                                
1       3.81114  3.10408   2.69435  ...  0.0262447  0.0262447  0.248922
2       3.30592  2.52016   1.32092  ...   0.217925   0.217925  0.502946
3       2.34949  1.45155  0.982218  ...          0          0         0
4       3.74346  3.01587   2.52894  ...  0.0447763  0.0447763  0.180254
5       3.89513  3.30912   2.95136  ...          0          0  0.239789

[5 rows x 8985 columns]


# Evaluation with RMSE

In [45]:
test_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
34795,233,2054,2.0,1448816519
87144,562,1921,3.5,1368894071
67847,438,3753,4.0,1105669987
5392,38,193,1.0,841341598
100755,610,132046,3.0,1493846961


train 데이터에 있는 데이터로만 평가가 가능하다는 것이 협업필터링의 단점

In [48]:
def evaluate(test_df, prediction_result_df):
  groups_with_movie_ids = test_df.groupby(by='movieId')
  groups_with_user_ids = test_df.groupby(by='userId')
  intersection_movie_ids = sorted(list(set(list(prediction_result_df.columns)).intersection(set(list(groups_with_movie_ids.indices.keys())))))
  intersection_user_ids = sorted(list(set(list(prediction_result_df.index)).intersection(set(groups_with_user_ids.indices.keys()))))
  print(len(intersection_movie_ids))
  print(len(intersection_user_ids))

  # intersection이 되는 userId와 movieId 만을 가져옴
  compressed_prediction_df = prediction_result_df.loc[intersection_user_ids][intersection_movie_ids]

  # RMSE 계산
  grouped = test_df.groupby(by='userId')
  result_df = pd.DataFrame(columns=['rmse'])
  for userId, group in tqdm(grouped):
      if userId in intersection_user_ids:
          pred_ratings = compressed_prediction_df.loc[userId][compressed_prediction_df.loc[userId].index.intersection(list(group['movieId'].values))]
          pred_ratings = pred_ratings.to_frame(name='rating').reset_index().rename(columns={'index':'movieId','rating':'pred_rating'})
          actual_ratings = group[['rating', 'movieId']].rename(columns={'rating':'actual_rating'})

          final_df = pd.merge(actual_ratings, pred_ratings, how='inner', on=['movieId'])
          final_df = final_df.round(4)

  return final_df


## User-based

In [49]:
# 유저기반
evaluate(test_df, user_prediction_result_df)

4336
609


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, max=609.0), HTML(value='')))




Unnamed: 0,actual_rating,movieId,pred_rating
0,3.0,132046,0.890112
1,5.0,38095,0.869004
2,3.5,62434,2.82882
3,4.0,55290,2.88082
4,4.5,5349,3.34186
...,...,...,...
217,3.5,138210,1.34592
218,4.0,67087,2.81935
219,1.5,33158,0.517454
220,5.0,1200,3.79377


In [54]:
result_df = evaluate(test_df, user_prediction_result_df)
print(f"RMSE: {sqrt(mean_squared_error(result_df['actual_rating'].values, result_df['pred_rating'].values))}")

4336
609


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, max=609.0), HTML(value='')))


RMSE: 1.6668956206189323


## Item-based

In [50]:
# 아이템기반
evaluate(test_df, item_prediction_result_df)

4336
609


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, max=609.0), HTML(value='')))




Unnamed: 0,actual_rating,movieId,pred_rating
0,3.0,132046,3.61
1,5.0,38095,3.83493
2,3.5,62434,3.69019
3,4.0,55290,3.77153
4,4.5,5349,3.83224
...,...,...,...
217,3.5,138210,3.56033
218,4.0,67087,3.67017
219,1.5,33158,3.57621
220,5.0,1200,3.89377


In [53]:
result_df = evaluate(test_df, item_prediction_result_df)
print(f"RMSE: {sqrt(mean_squared_error(result_df['actual_rating'].values, result_df['pred_rating'].values))}")

4336
609


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, max=609.0), HTML(value='')))


RMSE: 0.814507199682496
