# Personalized Recommendations

## Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sb
from sklearn.decomposition import NMF

## Get data

In [2]:
columns = ["user_id","movie_id","rating","rating_timestamp"]
df=pd.read_csv("ratings.dat.txt", sep="::", names=columns, parse_dates = ["rating_timestamp"])

  


In [3]:
df.head()

Unnamed: 0,user_id,movie_id,rating,rating_timestamp
0,1,114508,8,1381006850
1,2,499549,9,1376753198
2,2,1305591,8,1376742507
3,2,1428538,1,1371307089
4,3,75314,1,1595468524


In [4]:
from datetime import datetime

df['datetime'] = df['rating_timestamp'].apply(lambda ts: pd.to_datetime(ts, unit='s'))
df['year'] = df['datetime'].apply(lambda dt: dt.year)
df.head()

Unnamed: 0,user_id,movie_id,rating,rating_timestamp,datetime,year
0,1,114508,8,1381006850,2013-10-05 21:00:50,2013
1,2,499549,9,1376753198,2013-08-17 15:26:38,2013
2,2,1305591,8,1376742507,2013-08-17 12:28:27,2013
3,2,1428538,1,1371307089,2013-06-15 14:38:09,2013
4,3,75314,1,1595468524,2020-07-23 01:42:04,2020


In [5]:
df.groupby('year').count()['user_id']

year
2013    170807
2014    156354
2015    123644
2016    112455
2017    105987
2018     81870
2019     74183
2020     63134
2021     18756
Name: user_id, dtype: int64

In [6]:
df_2020 = df[df['year']==2020]

In [12]:
rating_nbr_by_user = df_2020.groupby('user_id').count()['rating'].reset_index()

In [15]:
users_1_rating = rating_nbr_by_user[rating_nbr_by_user['rating']==1]['user_id']

In [18]:
df=df_2020[~df_2020['user_id'].isin(users_1_rating)]

## Create rating matrix & latent vectors

In [19]:
rating_matrix = pd.pivot_table(index='user_id', columns='movie_id', values='rating', data=df_2020, fill_value=0)

In [20]:
rating_matrix

movie_id,417,4413,7340,10323,10747,12349,13086,13442,14142,15016,...,12574336,12587564,12588160,12607910,12724622,12749596,12762684,12875782,12888462,12920708
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70787,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
70788,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
70793,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
70804,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
rating_matrix.shape

(11192, 8483)

In [22]:
model = NMF(n_components=100, init='random', random_state=0)

In [23]:
W = model.fit_transform(rating_matrix)



In [28]:
H = model.components_.T

In [29]:
H.shape

(8483, 100)