# Imports

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import surprise
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
import recmetrics

# Dataset

In [2]:
ratings = pd.read_csv('Small/data/ratings.csv')

In [3]:
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)

# Recommendations

In [4]:
def get_users_predictions(user_id, n, model):
    recommended_items = pd.DataFrame(model.loc[user_id])
    recommended_items.columns = ["predicted_rating"]
    recommended_items = recommended_items.sort_values('predicted_rating', ascending=False)    
    recommended_items = recommended_items.head(n)
    return recommended_items.index.tolist()

### Matrix Factorization

In [5]:
algo = surprise.prediction_algorithms.matrix_factorization.SVD()
algo.fit(trainset)
mf_test = algo.test(testset)
mf_test = pd.DataFrame(mf_test)
mf_test.drop("details", inplace=True, axis=1)
mf_test.columns = ['userId', 'movieId', 'actual', 'svd_predictions']
mf_model = mf_test.pivot_table(index='userId', columns='movieId', values='svd_predictions').fillna(0)

mf_test = mf_test.copy().groupby('userId', as_index=False)['movieId'].agg({'actual': (lambda x: list(set(x)))})
mf_test = mf_test.set_index("userId")

mf_recs = []
for user in mf_test.index:
    mf_predictions = get_users_predictions(user, 10, mf_model)
    mf_recs.append(mf_predictions)
        
mf_test['recommended_movies'] = mf_recs
mf_recommendations = mf_test.reset_index().drop(columns='actual')

mf_recommendations.to_csv('Small/recommendations/mf.csv')
mf_recommendations

Unnamed: 0,userId,recommended_movies
0,1,"[1197, 2959, 1222, 1617, 923, 1198, 3740, 1219..."
1,2,"[318, 109487, 79132, 115713, 3578, 89774, 1317..."
2,3,"[3703, 3210, 1093, 5181, 849, 2851, 72378, 331..."
3,4,"[1219, 898, 1617, 912, 4226, 1304, 47, 2858, 1..."
4,5,"[296, 34, 265, 364, 589, 232, 247, 36, 290, 534]"
...,...,...
605,606,"[912, 750, 4226, 3147, 1208, 7153, 260, 4973, ..."
606,607,"[1249, 1374, 457, 110, 2762, 1954, 2791, 3114,..."
607,608,"[50, 4878, 1387, 4993, 5418, 223, 5954, 54503,..."
608,609,"[318, 296, 1, 589, 1150, 590, 731, 288, 292, 828]"


### KNN

In [6]:
algo = surprise.prediction_algorithms.knns.KNNBaseline()
algo.fit(trainset)
knn_test = algo.test(testset)
knn_test = pd.DataFrame(knn_test)
knn_test.drop("details", inplace=True, axis=1)
knn_test.columns = ['userId', 'movieId', 'actual', 'cf_predictions']
knn_model = knn_test.pivot_table(index='userId', columns='movieId', values='cf_predictions').fillna(0)

knn_test = knn_test.copy().groupby('userId', as_index=False)['movieId'].agg({'actual': (lambda x: list(set(x)))})
knn_test = knn_test.set_index("userId")

knn_recs = []
for user in knn_test.index:
    knn_predictions = get_users_predictions(user, 10, knn_model
)
    knn_recs.append(knn_predictions)
        
knn_test['recommended_movies'] = knn_recs
knn_recommendations = knn_test.reset_index().drop(columns='actual')

knn_recommendations.to_csv('Small/recommendations/knn.csv')
knn_recommendations

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


Unnamed: 0,userId,recommended_movies
0,1,"[1222, 1617, 943, 1197, 923, 3062, 3740, 1219,..."
1,2,"[318, 115713, 109487, 3578, 79132, 89774, 1317..."
2,3,"[3703, 1093, 3210, 5181, 849, 2851, 72378, 331..."
3,4,"[1279, 296, 2351, 2973, 1219, 912, 1704, 898, ..."
4,5,"[296, 247, 364, 265, 34, 290, 36, 534, 594, 589]"
...,...,...
605,606,"[506, 1571, 1066, 33564, 318, 28, 2436, 4973, ..."
606,607,"[110, 1249, 1374, 1954, 457, 2762, 1304, 1258,..."
607,608,"[430, 8132, 4226, 527, 50, 593, 1261, 1219, 68..."
608,609,"[1150, 296, 318, 1, 590, 589, 288, 731, 292, 434]"


In [9]:
knn_recommendations['recommended_movies'][0]

[1222, 1617, 943, 1197, 923, 3062, 3740, 1219, 2959, 1198]