<a href="https://colab.research.google.com/github/solobala/RMSL_9/blob/main/RS_lecture_5_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install surprise

In [None]:
from surprise import SVD, SVDpp
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

import matplotlib.pyplot as plt

from tqdm import tqdm_notebook

from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
from sklearn.neighbors import NearestNeighbors

import pandas as pd
import numpy as np

In [None]:
!wget https://github.com/ALKONDR/netology-recsys/archive/refs/heads/master.zip
!unzip master.zip

In [None]:
links = pd.read_csv('netology-recsys-master/lecture-1/links.csv')
movies = pd.read_csv('netology-recsys-master/lecture-1/movies.csv')
ratings = pd.read_csv('netology-recsys-master/lecture-1/ratings.csv')
tags = pd.read_csv('netology-recsys-master/lecture-1/tags.csv')

In [None]:
movies_with_ratings = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)
movies_with_ratings.dropna(inplace=True)

In [None]:
movies_with_ratings.head()

In [None]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.title,
    'rating': movies_with_ratings.rating
})

In [None]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(dataset, reader)

In [None]:
trainset, testset = train_test_split(data, test_size=.15, random_state=42)

In [None]:
%%time
algo = SVD(n_factors=20, n_epochs=20)
algo.fit(trainset)

In [None]:
test_pred = algo.test(testset)

In [None]:
accuracy.rmse(test_pred, verbose=True)

In [None]:
algo.predict(uid=2.0, iid='Mortal Kombat (1995)').est

In [None]:
current_user_id = 2.0
user_movies = movies_with_ratings[movies_with_ratings.userId == current_user_id].title.unique()

scores = []
titles = []

for movie in movies_with_ratings.title.unique():
    if movie in user_movies:
        continue

    scores.append(algo.predict(uid=current_user_id, iid=movie).est)
    titles.append(movie)

In [None]:
sorted(scores)[-10:]

In [None]:
def change_string(s):
    return ' '.join(s.replace(' ', '').replace('-', '').split('|'))

In [None]:
movie_genres = [change_string(g) for g in movies.genres.values]

In [None]:
movie_genres[0]

'Adventure Animation Children Comedy Fantasy'

In [None]:
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(movie_genres)

tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

neigh = NearestNeighbors(n_neighbors=20, n_jobs=-1, metric='euclidean')
neigh.fit(X_train_tfidf)

In [None]:
test = change_string("Adventure|Comedy|Fantasy|Crime")

predict = count_vect.transform([test])
X_tfidf2 = tfidf_transformer.transform(predict)

res = neigh.kneighbors(X_tfidf2, return_distance=True)

In [None]:
res

In [None]:
movies.iloc[res[1][0]]

In [None]:
movies_with_ratings.sort_values('timestamp', inplace=True)

In [None]:
movies.head()

In [None]:
movies_with_ratings[movies_with_ratings]

In [None]:
title_genres = {}

for index, row in tqdm_notebook(movies.iterrows()):
    title_genres[row.title] = row.genres

In [None]:
def recommend_for_user(user_id):
    current_user_id = user_id
    user_movies = movies_with_ratings[movies_with_ratings.userId == current_user_id].title.unique()

    last_user_movie = user_movies[-1]

    movie_genres = title_genres[last_user_movie]

    movie_genres = change_string(movie_genres)

    predict = count_vect.transform([movie_genres])
    X_tfidf2 = tfidf_transformer.transform(predict)

    res = neigh.kneighbors(X_tfidf2, return_distance=True)

    movies_to_score = movies.iloc[res[1][0]].title.values

    scores = []
    titles = []

    for movie in movies_to_score:
        if movie in user_movies:
            continue

        scores.append(algo.predict(uid=current_user_id, iid=movie).est)
        titles.append(movie)


    best_indexes = np.argsort(scores)[-10:]
    for i in reversed(best_indexes):
        print(titles[i], scores[i])

In [None]:
movies_with_ratings[movies_with_ratings.userId == 2.0].sort_values('rating')

In [None]:
recommend_for_user(2.0)

In [None]:
np.argsort([1,9,5,7])