#### 1. import libary and data

In [1]:
import pandas as pd
import numpy as np
import scipy.stats
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity

In [2]:
ratings = pd.read_csv('../datasets/movie/ratings_small.csv')

ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [3]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB


In [4]:
movies = pd.read_csv('../datasets/movie/movies_small.csv')

movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
# ratings 와 movies merge

df = pd.merge(ratings, movies, on='movieId', how='inner')

df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


#### 2. EDA

In [6]:
# Aggregate by movie
agg_ratings = df.groupby('title').agg(mean_rating = ('rating', 'mean'),
                                                number_of_ratings = ('rating', 'count')).reset_index()

# Keep the movies with over 100 ratings
agg_ratings_GT100 = agg_ratings[agg_ratings['number_of_ratings']>100]

# Check the information of the dataframe
agg_ratings_GT100.info()

<class 'pandas.core.frame.DataFrame'>
Index: 134 entries, 74 to 9615
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   title              134 non-null    object 
 1   mean_rating        134 non-null    float64
 2   number_of_ratings  134 non-null    int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 4.2+ KB


In [7]:
agg_ratings_GT100.sort_values(by='number_of_ratings', ascending=False).head()

Unnamed: 0,title,mean_rating,number_of_ratings
3158,Forrest Gump (1994),4.164134,329
7593,"Shawshank Redemption, The (1994)",4.429022,317
6865,Pulp Fiction (1994),4.197068,307
7680,"Silence of the Lambs, The (1991)",4.16129,279
5512,"Matrix, The (1999)",4.192446,278


In [8]:
# df 와 agg_ratings_GT100 을 merge

df_GT100 = pd.merge(df, agg_ratings_GT100, on='title', how='inner')

df_GT100.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,mean_rating,number_of_ratings
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92093,215
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92093,215
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92093,215
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92093,215
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92093,215


#### 3. DF 생성
* DataFrame
    * index = movie title
    * columns = userId
    * values = rating

In [9]:
# cos 유사도분석을 할 때, 영화 평점에 대한 유사도(아이템 기반)를 보고 싶으면 인덱스를 영화로
#                        유저 평점에 대한 유사도(유저 기반)를 보고 싶으면 인덱스를 user id로

matrix = df_GT100.pivot_table(index='userId', columns='title', values='rating')

matrix.head()

title,2001: A Space Odyssey (1968),Ace Ventura: Pet Detective (1994),Aladdin (1992),Alien (1979),Aliens (1986),"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",American Beauty (1999),American History X (1998),American Pie (1999),Apocalypse Now (1979),...,True Lies (1994),"Truman Show, The (1998)",Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Twister (1996),Up (2009),"Usual Suspects, The (1995)",WALL·E (2008),Waterworld (1995),Willy Wonka & the Chocolate Factory (1971),X-Men (2000)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,4.0,,,5.0,5.0,,4.0,...,,,,3.0,,5.0,,,5.0,5.0
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,4.0,,,,5.0,,,,...,,,2.0,,,,,,4.0,
5,,3.0,4.0,,,,,,,,...,2.0,,,,,4.0,,,,


#### 4. matrix normalize and pearson correlation

In [10]:
# 피어슨 상관분석을 통한 유사도 분석
# DataFrame.corr(method='pearson', min_periods=1, numeric_only=False)

# Normalize user-item matrix -> 평균을 기준으로 양쪽으로 정규화
matrix_norm = matrix.subtract(matrix.mean(axis=1), axis = 0)
# matrix_norm.head()
user_similiarity = matrix_norm.T.corr()     # T를 통해서 인덱스, 컬럼 Transform

In [11]:
matrix_norm

title,2001: A Space Odyssey (1968),Ace Ventura: Pet Detective (1994),Aladdin (1992),Alien (1979),Aliens (1986),"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",American Beauty (1999),American History X (1998),American Pie (1999),Apocalypse Now (1979),...,True Lies (1994),"Truman Show, The (1998)",Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Twister (1996),Up (2009),"Usual Suspects, The (1995)",WALL·E (2008),Waterworld (1995),Willy Wonka & the Chocolate Factory (1971),X-Men (2000)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,-0.392857,,,0.607143,0.607143,,-0.392857,...,,,,-1.392857,,0.607143,,,0.607143,0.607143
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,0.617647,,,,1.617647,,,,...,,,-1.382353,,,,,,0.617647,
5,,-0.461538,0.538462,,,,,,,,...,-1.461538,,,,,0.538462,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,1.097826,,,0.097826,-0.402174,0.597826,0.597826,0.097826,-2.902174,0.597826,...,,0.597826,0.097826,,,0.597826,0.097826,,,
607,,,,-0.900000,,,-0.900000,,,,...,0.100000,,,1.100000,,,,-0.900000,,-0.900000
608,-0.533613,-0.033613,-0.533613,0.466387,0.966387,,1.466387,0.466387,-1.033613,-0.533613,...,-0.533613,0.966387,-0.033613,-0.533613,,0.966387,,-0.533613,-0.033613,0.466387
609,,,,,,,,,,,...,,,,,,,,-0.333333,,


In [12]:
user_similiarity

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,,,0.391797,0.180151,-0.439941,-0.029894,0.464277,1.000000,-0.037987,...,0.091574,0.254514,0.101482,-0.500000,0.780020,0.303854,-0.012077,0.242309,-0.175412,0.071553
2,,1.000000,,,,,,,,1.000000,...,-0.583333,,-1.000000,,,0.583333,,-0.229416,,0.765641
3,,,,,,,,,,,...,,,,,,,,,,
4,0.391797,,,1.000000,-0.394823,0.421927,0.704669,0.055442,,0.360399,...,-0.239325,0.562500,0.162301,-0.158114,0.905134,0.021898,-0.020659,-0.286872,,-0.050868
5,0.180151,,,-0.394823,1.000000,-0.006888,0.328889,0.030168,,-0.777714,...,0.000000,0.231642,0.131108,0.068621,-0.245026,0.377341,0.228218,0.263139,0.384111,0.040582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.303854,0.583333,,0.021898,0.377341,-0.468008,0.154338,0.309634,-0.262613,-0.584677,...,0.411419,0.211316,0.304117,0.600640,0.129249,1.000000,0.066328,0.410455,0.738371,0.190550
607,-0.012077,,,-0.020659,0.228218,0.541386,0.374808,0.281876,,-1.000000,...,0.640257,0.300075,0.049099,-0.034091,-0.027678,0.066328,1.000000,0.037195,0.190117,0.013792
608,0.242309,-0.229416,,-0.286872,0.263139,-0.337129,0.058785,0.362413,0.073324,-0.544342,...,0.471812,0.366453,0.308547,0.226134,0.059129,0.410455,0.037195,1.000000,0.503937,0.103155
609,-0.175412,,,,0.384111,0.158255,0.420288,0.104828,,,...,1.000000,0.167095,0.461880,0.800327,-0.550000,0.738371,0.190117,0.503937,1.000000,-0.521773


In [13]:
# cosine_similarity 분석
# class pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=None)[source]

cosim = cosine_similarity(matrix.fillna(0))

cos_user_similiarity = pd.DataFrame(data=cosim, index=matrix_norm.index, columns=matrix_norm.index)

In [14]:
cos_user_similiarity

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.056296,0.149940,0.502701,0.317641,0.360316,0.364989,0.292416,0.161828,0.094330,...,0.258653,0.416659,0.646653,0.185122,0.304023,0.595505,0.565108,0.620801,0.246867,0.606943
2,0.056296,1.000000,0.000000,0.019890,0.046363,0.050349,0.035701,0.061566,0.000000,0.227727,...,0.310043,0.051480,0.084336,0.000000,0.000000,0.189484,0.049686,0.156093,0.073012,0.222647
3,0.149940,0.000000,1.000000,0.000000,0.274411,0.107280,0.000000,0.218635,0.000000,0.000000,...,0.177109,0.182818,0.091372,0.000000,0.000000,0.131511,0.176446,0.101169,0.000000,0.080407
4,0.502701,0.019890,0.000000,1.000000,0.198991,0.230690,0.308946,0.146191,0.061477,0.080363,...,0.187644,0.254814,0.533513,0.139434,0.264859,0.377129,0.360590,0.471025,0.073255,0.441904
5,0.317641,0.046363,0.274411,0.198991,1.000000,0.624101,0.211875,0.614356,0.000000,0.115703,...,0.147746,0.622074,0.287508,0.498556,0.387118,0.253337,0.364109,0.390052,0.466030,0.261621
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.595505,0.189484,0.131511,0.377129,0.253337,0.286401,0.567226,0.311680,0.260981,0.300137,...,0.569716,0.345732,0.673317,0.213851,0.373437,1.000000,0.454345,0.850007,0.219934,0.819662
607,0.565108,0.049686,0.176446,0.360590,0.364109,0.451775,0.374480,0.368799,0.046073,0.025980,...,0.218127,0.461928,0.550304,0.313488,0.283333,0.454345,1.000000,0.595771,0.365994,0.568714
608,0.620801,0.156093,0.101169,0.471025,0.390052,0.466694,0.568533,0.445144,0.182716,0.302577,...,0.518947,0.490130,0.721413,0.357964,0.420190,0.850007,0.595771,1.000000,0.324610,0.815061
609,0.246867,0.073012,0.000000,0.073255,0.466030,0.593401,0.193537,0.586720,0.000000,0.078089,...,0.073474,0.583037,0.175708,0.432681,0.283740,0.219934,0.365994,0.324610,1.000000,0.217414


In [15]:
# user 1 이용
picked_user = 1

user_similiarity.drop(index=picked_user, inplace=True)
user_similiarity.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,1.0,,,,,,,,1.0,...,-0.583333,,-1.0,,,0.583333,,-0.229416,,0.765641
3,,,,,,,,,,,...,,,,,,,,,,
4,0.391797,,,1.0,-0.394823,0.421927,0.704669,0.055442,,0.360399,...,-0.239325,0.5625,0.162301,-0.158114,0.905134,0.021898,-0.020659,-0.286872,,-0.050868
5,0.180151,,,-0.394823,1.0,-0.006888,0.328889,0.030168,,-0.777714,...,0.0,0.231642,0.131108,0.068621,-0.245026,0.377341,0.228218,0.263139,0.384111,0.040582
6,-0.439941,,,0.421927,-0.006888,1.0,0.0,-0.127385,,0.957427,...,-0.29277,-0.030599,-0.123983,-0.176327,0.063861,-0.468008,0.541386,-0.337129,0.158255,-0.030567


In [16]:
# 비슷한 유저 10명
n = 10

# 유사도의 임계점을 정한다
user_similiarity_threshold = 0.3

# 10명을 뽑아낸다
similar_users = user_similiarity[user_similiarity[picked_user] > user_similiarity_threshold][picked_user].sort_values(ascending=False)[:n]

In [17]:
similar_users

userId
108    1.000000
9      1.000000
550    1.000000
598    1.000000
502    1.000000
401    0.942809
511    0.925820
366    0.872872
154    0.866025
595    0.866025
Name: 1, dtype: float64

#### 5. 대상유저가 본 영화 제거하고 비슷한 유저가 본 영화를 골라낸다

In [18]:
picked_userid_watched = matrix_norm[matrix_norm.index == picked_user].dropna(axis=1, how='all')
picked_userid_watched

title,Alien (1979),American Beauty (1999),American History X (1998),Apocalypse Now (1979),Back to the Future (1985),Batman (1989),"Big Lebowski, The (1998)",Braveheart (1995),Clear and Present Danger (1994),Clerks (1994),...,Star Wars: Episode IV - A New Hope (1977),Star Wars: Episode V - The Empire Strikes Back (1980),Star Wars: Episode VI - Return of the Jedi (1983),Stargate (1994),"Terminator, The (1984)",Toy Story (1995),Twister (1996),"Usual Suspects, The (1995)",Willy Wonka & the Chocolate Factory (1971),X-Men (2000)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.392857,0.607143,0.607143,-0.392857,0.607143,-0.392857,0.607143,-0.392857,-0.392857,-1.392857,...,0.607143,0.607143,0.607143,-1.392857,0.607143,-0.392857,-1.392857,0.607143,0.607143,0.607143


In [19]:
# 비슷한 유저
similar_user_movies = matrix_norm[matrix_norm.index.isin(similar_users.index)].dropna(axis=1, how='all')
similar_user_movies

title,Aladdin (1992),Alien (1979),"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",Back to the Future (1985),Batman Begins (2005),"Beautiful Mind, A (2001)",Beauty and the Beast (1991),Blade Runner (1982),"Bourne Identity, The (2002)",Braveheart (1995),...,Shrek (2001),"Silence of the Lambs, The (1991)",Spider-Man (2002),Star Wars: Episode I - The Phantom Menace (1999),Terminator 2: Judgment Day (1991),Titanic (1997),Toy Story (1995),Up (2009),"Usual Suspects, The (1995)",WALL·E (2008)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9,,,,0.333333,,,,,,,...,,,,,,,,,,
108,,,0.466667,0.466667,,0.466667,,0.466667,,,...,,,0.466667,,,-0.533333,,,,
154,,,,,,,,,,,...,,,,,,,,0.214286,,
366,,,,,-0.205882,,,,,-0.205882,...,,,,,-0.205882,,,,,
401,-0.382353,,,,,,-0.382353,,,,...,0.117647,,,,,,0.117647,0.617647,,0.617647
502,,-0.375,,,,,,,,,...,,,,,,,,,,
511,,,-0.653846,,,,,,,,...,,,-1.153846,-0.653846,,,,-0.153846,,
550,,,,,,,,,,,...,,,,,,,-0.277778,0.222222,,-0.277778
595,,,,,,,,,,,...,,,,-0.333333,,,,,0.666667,
598,,,,,,,,,0.888889,,...,-2.111111,-2.611111,,,,,,,,


In [20]:
similar_user_movies.drop(picked_userid_watched.columns, axis=1, inplace=True, errors='ignore')

In [21]:
similar_user_movies

title,Aladdin (1992),"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",Batman Begins (2005),"Beautiful Mind, A (2001)",Beauty and the Beast (1991),Blade Runner (1982),"Bourne Identity, The (2002)","Breakfast Club, The (1985)",Catch Me If You Can (2002),"Dark Knight, The (2008)",...,"Monsters, Inc. (2001)",Ocean's Eleven (2001),Pirates of the Caribbean: The Curse of the Black Pearl (2003),"Shawshank Redemption, The (1994)",Shrek (2001),Spider-Man (2002),Terminator 2: Judgment Day (1991),Titanic (1997),Up (2009),WALL·E (2008)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9,,,,,,,,,,,...,,,,,,,,,,
108,,0.466667,,0.466667,,0.466667,,-0.533333,0.466667,,...,,,,,,0.466667,,-0.533333,,
154,,,,,,,,,,,...,,,,,,,,,0.214286,
366,,,-0.205882,,,,,,,-0.205882,...,,,-0.205882,,,,-0.205882,,,
401,-0.382353,,,,-0.382353,,,,,,...,0.117647,,0.117647,,0.117647,,,,0.617647,0.617647
502,,,,,,,,,,,...,,,,0.125,,,,,,
511,,-0.653846,,,,,,,,,...,,,,0.346154,,-1.153846,,,-0.153846,
550,,,,,,,,,-0.277778,-0.277778,...,,,,0.222222,,,,,0.222222,-0.277778
595,,,,,,,,,,,...,,,,,,,,,,
598,,,,,,,0.888889,,,,...,,0.888889,,,-2.111111,,,,,


#### 6. Recommend Items

In [22]:
item_score = {}

for i in similar_user_movies.columns:
    movie_rating = similar_user_movies[i]
    
    total = 0
    count = 0

    for j in similar_users.index:
        
        if pd.isna(movie_rating[j]) == False :
            score = similar_users[j] * movie_rating[j]
            
            total += score
            count += 1

    item_score[i] = total / count

item_score_df = pd.DataFrame(item_score.items(), columns=['movie', 'movie_score'])