In [11]:
import numpy as np
import pandas as pd
from surprise import Reader,Dataset,KNNBasic,accuracy
from surprise.model_selection import cross_validate,train_test_split

In [12]:
ratings = pd.read_csv('ml-latest-small/ratings.csv')
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [13]:
reader = Reader(rating_scale=(0.5,5))
data = Dataset.load_from_df(ratings[['userId','movieId','rating']],reader)

In [14]:
sim_options = {
    'name':'cosine',
    'user_based':True
}
knn = KNNBasic(sim_options=sim_options)

In [15]:
cross_validate(knn,data,measures=['RMSE','MAE'],cv=5,verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9751  0.9595  0.9654  0.9864  0.9790  0.9731  0.0096  
MAE (testset)     0.7481  0.7408  0.7435  0.7582  0.7535  0.7488  0.0064  
Fit time          0.28    0.39    0.29    0.26    0.27    0.30    0.05    
Test time         0.89    1.04    0.86    0.87    0.81    0.90    0.08    


{'test_rmse': array([0.97511329, 0.95950262, 0.96543396, 0.98644708, 0.97902426]),
 'test_mae': array([0.74811108, 0.74082879, 0.74350164, 0.75821226, 0.75345662]),
 'fit_time': (0.28397321701049805,
  0.38851189613342285,
  0.29271674156188965,
  0.26346540451049805,
  0.2720921039581299),
 'test_time': (0.8918235301971436,
  1.043933391571045,
  0.8618106842041016,
  0.8681135177612305,
  0.811046838760376)}

In [16]:
trainset = data.build_full_trainset()
knn.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x29fc3615a90>

In [17]:
def get_recommendations(user_id,n=10):
    watched = set(ratings[ratings['userId']==user_id]['movieId'])
    all_movies = set(ratings['movieId'])
    unwatched = all_movies - watched

    predictions = [(movie_id, knn.predict(user_id, movie_id).est) for movie_id in unwatched]
    top_n = sorted(predictions, key=lambda x: x[1], reverse=True)[:n]

    return top_n

In [21]:
top_recommendations = get_recommendations(381, 10)
print(top_recommendations)

[(53, 5), (163925, 5), (131237, 5), (467, 5), (495, 5), (164367, 5), (626, 5), (633, 5), (876, 5), (33649, 5)]
