In [222]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from surprise import SVD, SVDpp
from surprise import KNNWithMeans, KNNBasic, KNNWithZScore, KNNBaseline
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split


from tqdm import tqdm_notebook

In [223]:
links = pd.read_csv('../001-intro/links.csv')
movies = pd.read_csv('../001-intro/movies.csv')
ratings = pd.read_csv('../001-intro/ratings.csv')
tags = pd.read_csv('../001-intro/tags.csv')

In [224]:
movies_with_ratings = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)
movies_with_ratings.dropna(inplace=True)

In [225]:
movies_with_ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1.0,4.0,964982700.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5.0,4.0,847435000.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7.0,4.5,1106636000.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15.0,2.5,1510578000.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17.0,4.5,1305696000.0


In [226]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.title,
    'rating': movies_with_ratings.rating
})

In [227]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(dataset, reader)

In [241]:
trainset, testset = train_test_split(data, test_size=.15, random_state=42)

In [242]:
#Готовим первый алгоритм - SVD

In [243]:
#%%time
algo1 = SVD(n_factors=20, n_epochs=20)
algo1.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1e7fa486e10>

In [244]:
test_pred1 = algo1.test(testset)

In [245]:
accuracy.rmse(test_pred1, verbose=True)

RMSE: 0.8692


0.8692121612931321

In [246]:
#Готовим второй алгоритм - KNNBaseline

In [247]:
algo2 = KNNBaseline(k=50, sim_options={'name': 'pearson', 'user_based': True})
algo2.fit(trainset)

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x1e7fa2febe0>

In [248]:
test_pred2 = algo2.test(testset)

In [249]:
accuracy.rmse(test_pred2, verbose=True)

RMSE: 0.8746


0.8745553389477664

In [250]:
#Проверяем точность предсказаний комбинации 1 и 2 алгоритмов

In [251]:
preds12 = []
alpha = 0.5
for i in range(len(test_pred1[:5])):
    pred = {}
    pred['uid'] = test_pred1[i].uid
    pred['iid'] = test_pred1[i].iid
    pred['r_ui'] = test_pred1[i].r_ui
    pred['est'] = alpha * test_pred1[i].est + (1.0 - alpha) * test_pred2[i].est
    preds12.append(pred)

In [252]:
mse = np.mean([float((c['r_ui'] - c['est'])**2) for c in preds12])
rmse = np.sqrt(mse)
print("RMSE: ", rmse)

RMSE:  0.806166335408938


In [253]:
#Готовим процедуру - как комбинацию из двух алгоритмов

In [254]:
def recommend_for_user(user_id):
    current_user_id = user_id
    #выбираем все фильмы которые не оценивал(не смотрел) данный пользователь
    movies_to_score = movies_with_ratings[movies_with_ratings.userId != current_user_id].title.unique()
    
    scores = []
    titles = []

    #предсказываем оценку двумя алгоритмами в соотношении alpha
    for movie in movies_to_score:
        if movie in user_movies:
            continue
        predScore1 = algo1.predict(uid=current_user_id, iid=movie).est
        predScore2 = algo2.predict(uid=current_user_id, iid=movie).est
        scores.append(alpha * predScore1 + (1.0 - alpha) * predScore2)
        titles.append(movie)
    
    best_indexes = np.argsort(scores)[-10:]
    for i in reversed(best_indexes):
        print(titles[i], scores[i])

In [255]:
recommend_for_user(2.0)

Seventh Seal, The (Sjunde inseglet, Det) (1957) 4.635666148381373
Touch of Evil (1958) 4.630902965572388
Notorious (1946) 4.593819581975939
To Catch a Thief (1955) 4.587669740691636
Day of the Doctor, The (2013) 4.57773461312037
Guess Who's Coming to Dinner (1967) 4.572967885020539
Yojimbo (1961) 4.5664931474948105
It Happened One Night (1934) 4.566432181612305
Sweet Hereafter, The (1997) 4.556048411718644
Hustler, The (1961) 4.547741368596745
