In [61]:
from surprise import SVD
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

import pandas as pd
import numpy as np

In [2]:
unames = ['user_id', 'gender', 'age', 'occupation', 'zip']
users = pd.read_table('../hw-2/ml-1m/users.dat', sep='::', header=None, names=unames, engine='python')

rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_table('../hw-2/ml-1m/ratings.dat', sep='::', header=None, names=rnames, engine='python')


mnames = ['movie_id', 'title', 'genres']
movies = pd.read_table('../hw-2/ml-1m/movies.dat', sep='::', header=None, names=mnames, engine='python')

In [3]:
data=pd.merge(pd.merge(ratings,users),movies)
data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,gender,age,occupation,zip,title,genres
0,1,1193,5,978300760,F,1,10,48067,One Flew Over the Cuckoo's Nest (1975),Drama
1,2,1193,5,978298413,M,56,16,70072,One Flew Over the Cuckoo's Nest (1975),Drama
2,12,1193,4,978220179,M,25,12,32793,One Flew Over the Cuckoo's Nest (1975),Drama
3,15,1193,4,978199279,M,25,7,22903,One Flew Over the Cuckoo's Nest (1975),Drama
4,17,1193,5,978158471,M,50,1,95350,One Flew Over the Cuckoo's Nest (1975),Drama


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000209 entries, 0 to 1000208
Data columns (total 10 columns):
 #   Column      Non-Null Count    Dtype 
---  ------      --------------    ----- 
 0   user_id     1000209 non-null  int64 
 1   movie_id    1000209 non-null  int64 
 2   rating      1000209 non-null  int64 
 3   timestamp   1000209 non-null  int64 
 4   gender      1000209 non-null  object
 5   age         1000209 non-null  int64 
 6   occupation  1000209 non-null  int64 
 7   zip         1000209 non-null  object
 8   title       1000209 non-null  object
 9   genres      1000209 non-null  object
dtypes: int64(6), object(4)
memory usage: 83.9+ MB


In [5]:
dataset = pd.DataFrame({
    'uid': data.user_id,
    'iid': data.title,
    'rating': data.rating
})
dataset.head()

Unnamed: 0,uid,iid,rating
0,1,One Flew Over the Cuckoo's Nest (1975),5
1,2,One Flew Over the Cuckoo's Nest (1975),5
2,12,One Flew Over the Cuckoo's Nest (1975),4
3,15,One Flew Over the Cuckoo's Nest (1975),4
4,17,One Flew Over the Cuckoo's Nest (1975),5


In [6]:
data.rating.min()

1

In [7]:
data.rating.max()

5

In [8]:
reader = Reader(rating_scale=(1.0, 5.0))
data_for_surprise = Dataset.load_from_df(dataset, reader)

In [9]:
trainset, testset = train_test_split(data_for_surprise, test_size=.15)

* В качестве гибридной модели будет блендинг из KNN и SVD

In [10]:
algo_1 = KNNWithMeans(k=60, sim_options={'name': 'pearson_baseline', 'user_based': True})
algo_1.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x220f601b5b0>

In [11]:
test_pred = algo_1.test(testset)

In [12]:
accuracy.rmse(test_pred, verbose=True)

RMSE: 0.8859


0.8858606345555965

In [13]:
algo_2 = SVD()
algo_2.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x220f601b9a0>

In [14]:
test_pred = algo_2.test(testset)

In [15]:
accuracy.rmse(test_pred, verbose=True)

RMSE: 0.8693


0.8692938221129964

In [16]:
algo_1.predict(uid=15, iid='Fight Club (1999)')

Prediction(uid=15, iid='Fight Club (1999)', r_ui=None, est=3.240721206384464, details={'actual_k': 60, 'was_impossible': False})

In [17]:
algo_2.predict(uid=15, iid='Fight Club (1999)')

Prediction(uid=15, iid='Fight Club (1999)', r_ui=None, est=3.2722189712919914, details={'was_impossible': False})

 * фунция вывода рекомендаций


In [69]:
def movie_recomendation(user_id):
    current_uid = user_id
    
    rated_movies1 = dataset[lambda x: x['uid'] == current_uid]  #фильмы, у которых есть рейтинг (просмотрены пользователем)
    
    unrated_movies = []
    iid_unique = dataset.iid.unique()

    for i in range(100):
        if iid_unique[i] not in rated_movies1.iid:    #список 100 непросмотенных пользователем фильмов
            unrated_movies.append(iid_unique[i])
        else:
            continue
            
    pre_recom_est = []        #предсказание моделями рейтингов непросмотенных фильмов и их усреднение
    pre_recom_title = []

    for i in range(100):
        g_est = 0.5 * algo_1.predict(uid=current_uid, iid=unrated_movies[i]).est + 0.5 * algo_2.predict(uid=current_uid, iid=unrated_movies[i]).est
        pre_recom_est.append(g_est)
        pre_recom_title.append(unrated_movies[i])
    
    best_est = np.argsort(pre_recom_est)[-10:]   #вывод 10 лучших по предсказанию фильмов
    for i in reversed(best_est):
        print(pre_recom_title[i], pre_recom_est[i])


In [70]:
movie_recomendation(1.0)

Schindler's List (1993) 4.8662742180124505
Christmas Story, A (1983) 4.819830136539945
Beauty and the Beast (1991) 4.802018378013276
Apollo 13 (1995) 4.7958441202121005
Rain Man (1988) 4.770427331837749
Ben-Hur (1959) 4.758032903899439
Toy Story 2 (1999) 4.737770424110732
Wizard of Oz, The (1939) 4.732262264884994
Back to the Future (1985) 4.7292463410799
Sound of Music, The (1965) 4.720957785567405


In [71]:
movie_recomendation(15.0)

Gladiator (2000) 4.353207739921809
Braveheart (1995) 4.348065799037607
Green Mile, The (1999) 4.1600696215138
Star Wars: Episode IV - A New Hope (1977) 4.084579131680256
Schindler's List (1993) 4.058923910840108
Saving Private Ryan (1998) 4.053430219835571
Few Good Men, A (1992) 4.043874281044726
Sixth Sense, The (1999) 4.037448333962896
Close Shave, A (1995) 3.948224035850231
Ferris Bueller's Day Off (1986) 3.9482118469510636


In [73]:
movie_recomendation(127.0)

Saving Private Ryan (1998) 4.94839601414956
Sixth Sense, The (1999) 4.929489095957349
Schindler's List (1993) 4.861098344123972
Terminator 2: Judgment Day (1991) 4.7599321336684035
Gladiator (2000) 4.756344029445167
Green Mile, The (1999) 4.732657500948706
Star Wars: Episode IV - A New Hope (1977) 4.709757855236022
Braveheart (1995) 4.688864576496651
Star Wars: Episode VI - Return of the Jedi (1983) 4.609391496675084
E.T. the Extra-Terrestrial (1982) 4.557176072773052
