# Przygotowanie

## Instalacja brakujących pakietów

In [None]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 13.1 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1630166 sha256=1d3c0a10503c883aeefa9a6d3f56c5f4f8e342809a15b7d0de82fc6473db39de
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


## Importy



In [None]:
import os
import pandas as pd
from collections import defaultdict

from surprise import SVD, Dataset, Reader, dump, accuracy
from surprise.model_selection import cross_validate, split, GridSearchCV
from datetime import datetime

## Stałe

In [None]:
RATINGS_FILE = 'ratings.csv'
RATINGS_SMALL_FILE = 'ratings_small.csv'
RATINGS_FILE_TMP = 'ratings_tmp.csv'

MOVIES_FILE = 'movies.csv'
GENRES_FILE = 'genres.csv'
MOVIES_GENRES_FILE = 'movies_genres.csv'

GOOGLE_PATH = '/content/drive'

## Montaż GOOGLE DRIVE

In [None]:
from google.colab import drive
drive.mount(GOOGLE_PATH)

Mounted at /content/drive


In [None]:
GOOGLE_MOUNT_PATH = os.path.join(GOOGLE_PATH, 'MyDrive')

## Ustawienia środowiska

In [None]:
PANDAS_MAX_ROW = 20

pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', PANDAS_MAX_ROW)

# Załadowanie danych

## Movies

In [None]:
MOVIES_CSV = os.path.join(GOOGLE_MOUNT_PATH, MOVIES_FILE)

In [None]:
movies = pd.read_csv(MOVIES_CSV)
movies.head()

Unnamed: 0,id,title,original_title,release_date,runtime,...,author_id,collection_id,gallery_id,original_language_id,status_id
0,2,Ariel,,1988-10-21,69.0,...,,,1,fi,2
1,3,Shadows in Paradise,Varjoja paratiisissa,1986-10-16,76.0,...,,,2,fi,2
2,5,Four Rooms,,1995-12-09,98.0,...,,,3,en,2
3,6,Judgment Night,,1993-10-15,110.0,...,,,4,en,2
4,11,Star Wars,,1977-05-25,121.0,...,,10.0,5,en,2


## Genres

In [None]:
GENRES_CSV = os.path.join(GOOGLE_MOUNT_PATH, GENRES_FILE)
MOVIES_GENRES_CSV = os.path.join(GOOGLE_MOUNT_PATH, MOVIES_GENRES_FILE)

In [None]:
genres = pd.read_csv(GENRES_CSV)
genres.head()

Unnamed: 0,id,name
0,28,Action
1,12,Adventure
2,16,Animation
3,35,Comedy
4,80,Crime


In [None]:
movies_genres = pd.read_csv(MOVIES_GENRES_CSV)
movies_genres.head()

Unnamed: 0,id,movie_id,genre_id
0,1,2,18
1,2,2,80
2,3,3,18
3,4,3,35
4,6,5,35


## Ratings

In [None]:
def zaladuj_oceny(RATING_FILE, ile_ocen = -1):
  RATINGS_CSV = os.path.join(GOOGLE_MOUNT_PATH, RATING_FILE)
  tmp = pd.read_csv(RATINGS_CSV)
  if ile_ocen != -1:
    tmp = tmp.tail(ile_ocen).sort_values(['date'])


  tmp['date'] = tmp.date.apply(lambda x: str(datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp()))
  tmp = tmp[['user_id', 'movie_id', 'rating', 'date']].astype({
    'user_id': 'int',
    'movie_id': 'int',
    'rating': 'int',
  })
  tmp.to_csv(RATINGS_FILE_TMP, index=False, sep='\t')

  return tmp

In [None]:
def dodajOcenyNowegoUzytkownika(ratings, new_rating):
  global USER_TEST
  USER_TEST = ratings.user_id.max() + 1
  tmp = []
  for x in new_rating:
    tmp.append({
        'user_id': USER_TEST,
        'movie_id': x['movie_id'],
        'rating': x['rating'],
        'date': str(datetime.strptime(x['date'], '%Y-%m-%d %H:%M:%S').timestamp()),	
    })
  ratings = ratings.append(tmp, ignore_index=True)
  ratings.to_csv(RATINGS_FILE_TMP, index=False, sep='\t')
  return ratings

### Przygotowanie listy filmów do predykcji

In [None]:
s_movie =  movies[['id', 'title']].copy()

s_movie.head()

Unnamed: 0,id,title
0,2,Ariel
1,3,Shadows in Paradise
2,5,Four Rooms
3,6,Judgment Night
4,11,Star Wars


# Silnik rekomendacji - rankingi

Rozwiązanie oparte o formułe IMDB, zwróci top filmy z wybranej kategori. Formuła IMDB:

wr = ($\frac{v}{v+m}$ * r) + ($\frac{m}{v+m}$ * c)

gdzie

v - liczba ocen danego filmu

m - minimalna liczba głosów, aby rozważyć dodanie do budowanej listy

r - średnia ocen danego filmu

c - średnia liczba ocen filmów z listy

In [None]:
def weighted_rating(movies, percentile=0.85):
    average_movies = movies.average_vote.mean()
    min_count = 700#movies.count_vote.quantile(percentile)
    
    qualified = movies[
        ['id', 'title', 'original_title', 'release_date', 'runtime', 'average_vote', 'count_vote']
    ].copy()
    qualified['wr'] = qualified.apply(lambda x: (
        x.count_vote / (x.count_vote + min_count) * x.average_vote +
        min_count / (x.count_vote + min_count) * average_movies
    ), axis=1)
    return qualified

## Top N Films

In [None]:
weighted_rating(movies, 0.9).sort_values('wr', ascending=False).head(5)

Unnamed: 0,id,title,original_title,release_date,runtime,average_vote,count_vote,wr
223,278,The Shawshank Redemption,,1994-09-23,142.0,8.858029,91082.0,8.837168
186,238,The Godfather,,1972-03-14,175.0,8.679622,57070.0,8.64864
486,629,The Usual Suspects,,1995-07-19,106.0,8.600378,59271.0,8.571458
322,424,Schindler's List,,1993-11-29,195.0,8.533061,67662.0,8.508381
188,240,The Godfather: Part II,,1974-12-20,200.0,8.52695,36679.0,8.481926


In [None]:
ratings = zaladuj_oceny(RATINGS_FILE)
movies = movies[movies.id.isin(ratings.groupby(['movie_id']).size()[ratings.groupby(['movie_id']).size() < 1000].keys())]

## Top N Comedy Films

In [None]:
weighted_rating(movies[movies.id.isin(
    movies_genres[movies_genres.genre_id == int(genres[genres.name == 'Action'].id)].movie_id
)], 0.80).sort_values('wr', ascending=False).head(5)

Unnamed: 0,id,title,original_title,release_date,runtime,average_vote,count_vote,wr
745,934,Rififi,Du rififi chez les hommes,1955-04-13,122.0,8.106312,903.0,7.056786
6624,14537,Harakiri,切腹,1962-09-15,135.0,8.260656,610.0,6.893915
5514,12496,The Twilight Samurai,Tasogare Seibei,2002-11-02,129.0,7.863248,819.0,6.867695
5195,11839,The Court Jester,,1955-12-24,101.0,7.83871,806.0,6.845968
4548,10971,A Night to Remember,,1958-12-16,123.0,7.648477,788.0,6.733218


# Silnik rekomendacji - personalizacja

## Przygotowanie danych

Sprawdzenie jak zadziała czysty model przy pełnym zbiorze danym dodając kilka użytkowników do testów

In [None]:
ratings = zaladuj_oceny(RATINGS_FILE)
ratings =  dodajOcenyNowegoUzytkownika(ratings, [
  { 'movie_id': 11, 'rating': 8, 'date': '2021-12-09 22:52:09', },      # Star Wars IV Nowa Nadzieja
  { 'movie_id': 1726, 'rating': 8, 'date': '2021-12-12 22:52:09', },    # Iron Man
  { 'movie_id': 19995, 'rating': 7, 'date': '2021-12-13 22:52:09', },   # Avatar 2009
  { 'movie_id': 120, 'rating': 9, 'date': '2021-12-15 22:52:09', },     # Władca Pierścieni: Drużyna Pierścienia
  { 'movie_id': 121, 'rating': 9, 'date': '2021-12-15 23:52:09', },     # Władca Pierścieni: Dwie wieże
  { 'movie_id': 122, 'rating': 9, 'date': '2021-12-16 22:52:09', },     # Władca Pierścieni: Powrót króla
  { 'movie_id': 8587, 'rating': 8, 'date': '2021-12-17 22:52:09', },    # Król Lew
  { 'movie_id': 157336, 'rating': 8, 'date': '2021-12-18 22:52:09', },  # Insterstellar
  { 'movie_id': 8966, 'rating': 5, 'date': '2021-12-19 22:52:09', },    # Zmierzch
  { 'movie_id': 1858, 'rating': 6, 'date': '2021-12-20 22:52:09', },    # Transformers
  { 'movie_id': 9600, 'rating': 4, 'date': '2022-01-01 22:52:09', },    # Agent XXL
  { 'movie_id': 10020, 'rating': 7, 'date': '2022-01-02 12:52:09', },   # Piękna i Bestia
  { 'movie_id': 771, 'rating': 8, 'date': '2022-01-03 12:01:09', },     # Kevin sam w domu
])
ratings =  dodajOcenyNowegoUzytkownika(ratings, [
  { 'movie_id': 11, 'rating': 10, 'date': '2015-03-10 22:52:09', },     # Star Wars IV Nowa Nadzieja
  { 'movie_id': 1893, 'rating': 10, 'date': '2015-03-12 22:52:09', },   # Star Wars I Mroczne widmo
  { 'movie_id': 1894, 'rating': 10, 'date': '2015-03-13 22:52:09', },   # Star Wars II Atak klonów
  { 'movie_id': 1895, 'rating': 10, 'date': '2015-03-15 22:52:09', },   # Star Wars III Zemsta Sithów
  { 'movie_id': 13, 'rating': 10, 'date': '2021-12-25 22:52:09', },     # Forest Gump
])
ratings

Unnamed: 0,user_id,movie_id,rating,date
0,1,197,2,1425941529.0
1,11,197,7,1231676989.0
2,22,197,10,1111937009.0
3,24,197,10,979870012.0
4,29,197,6,1044020005.0
...,...,...,...,...
25981470,270898,11,10,1426027929.0
25981471,270898,1893,10,1426200729.0
25981472,270898,1894,10,1426287129.0
25981473,270898,1895,10,1426459929.0


In [None]:
reader = Reader(line_format='user item rating timestamp', rating_scale=(1, 10), sep='\t', skip_lines=1)
data = Dataset.load_from_file(RATINGS_FILE_TMP, reader=reader)

train, test = split.train_test_split(data, test_size=0.2)

## Model SVD bez doboru parametrów

### Przygotowanie modelu

In [None]:
model = SVD()

### Trening modelu

In [None]:
cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

model.fit(train)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6507  1.6510  1.6516  1.6527  1.6514  1.6515  0.0007  
MAE (testset)     1.2403  1.2409  1.2411  1.2419  1.2413  1.2411  0.0005  
Fit time          1493.35 1514.25 1546.28 1577.34 1543.49 1534.94 28.84   
Test time         227.24  200.54  224.05  227.19  175.31  210.87  20.39   


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f4025aeb450>

### Test modelu

In [None]:
m_test = model.test(test)

In [None]:
accuracy.rmse(m_test)

RMSE: 1.6505


1.650509285517108

In [None]:
s_movie['user_1'] = s_movie.apply(lambda movie: model.predict(uid=str(USER_TEST - 1), iid=str(movie.id)).est, axis=1)
s_movie[~s_movie.id.isin(ratings[ratings.user_id == USER_TEST - 1].movie_id.unique())].sort_values('user_1', ascending=False).head(20)

Unnamed: 0,id,title,user_1
2293,5503,The Fugitive,9.451529
11709,27205,Inception,9.184371
42076,331214,Band of Brothers,9.161933
414,550,Fight Club,9.124002
6,13,Forrest Gump,9.043583
368,489,Good Will Hunting,9.035504
462,603,The Matrix,8.990754
68,105,Back to the Future,8.971478
24,38,Eternal Sunshine of the Spotless Mind,8.946322
42,77,Memento,8.936292


In [None]:
s_movie['user_2'] = s_movie.apply(lambda movie: model.predict(uid=str(USER_TEST), iid=str(movie.id)).est, axis=1)
s_movie[~s_movie.id.isin(ratings[ratings.user_id == USER_TEST].movie_id.unique())].sort_values('user_2', ascending=False).head(20)

Unnamed: 0,id,title,user_1,user_2
462,603,The Matrix,8.990754,10.0
570,745,The Sixth Sense,7.860559,10.0
486,629,The Usual Suspects,8.357201,9.845708
84,122,The Lord of the Rings: The Return of the King,9.183026,9.766419
82,120,The Lord of the Rings: The Fellowship of the Ring,9.032165,9.718672
42076,331214,Band of Brothers,9.161933,9.67128
1194,1892,Return of the Jedi,7.867557,9.668966
112,155,The Dark Knight,8.699605,9.668797
42,77,Memento,8.936292,9.667115
374,497,The Green Mile,8.055418,9.658556


## Dobór hiperparametrów

### Załadowanie mniejszego zestawu danych

Eksperymentowanie z całym modelem wymagało by zbyt dużo zasobów. Do szukania optymalnych parametrów użyto mniejszy zestaw danych

In [None]:
zaladuj_oceny(RATINGS_SMALL_FILE)

Unnamed: 0,user_id,movie_id,rating,date
0,383,807,10,789652009.0
1,383,8012,6,789652009.0
2,383,623,6,789652009.0
3,409,47018,8,828212412.0
4,409,8447,10,828212412.0
...,...,...,...,...
99805,251,7350,10,1476623131.0
99806,251,150540,10,1476623217.0
99807,251,38757,9,1476623282.0
99808,251,12,9,1476623300.0


### Przygotowanie

In [None]:
reader = Reader(line_format='user item rating timestamp', rating_scale=(1, 10), sep='\t', skip_lines=1)
data = Dataset.load_from_file(RATINGS_FILE_TMP, reader=reader)

param_grid = {
    'n_factors': [50, 100, 300, 500, 700],
    'n_epochs': [15, 20, 25],
    'lr_all': [0.005, 0.008, 0.01, 0.012, 0.015],
    'reg_all': [0.08, 0.1, 0.15, 0.2],
}

### Dobór hiperparametrów metodą przeszkuania GridSearchCV

In [None]:
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, n_jobs=-1, joblib_verbose=3)
gs.fit(data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   41.6s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  8.3min
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed: 20.6min
[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed: 58.9min
[Parallel(n_jobs=-1)]: Done 900 out of 900 | elapsed: 82.9min finished


### Zapisanie najlepszych wyników

#### Wyniki mierzone miarą RMSE

In [None]:
params_rmse = gs.best_params['rmse']
params_rmse

{'lr_all': 0.015, 'n_epochs': 15, 'n_factors': 100, 'reg_all': 0.2}

#### Wyniki mierzone miarą MAE

In [None]:
params_mae = gs.best_params['mae']
params_mae

{'lr_all': 0.01, 'n_epochs': 20, 'n_factors': 100, 'reg_all': 0.15}

## Trening modelu z najlepszymi parametrami RMSE



### Przygotowanie danych

In [None]:
ratings = zaladuj_oceny(RATINGS_FILE)
ratings =  dodajOcenyNowegoUzytkownika(ratings, [
  { 'movie_id': 11, 'rating': 8, 'date': '2021-12-09 22:52:09', },      # Star Wars IV Nowa Nadzieja
  { 'movie_id': 1726, 'rating': 8, 'date': '2021-12-12 22:52:09', },    # Iron Man
  { 'movie_id': 19995, 'rating': 7, 'date': '2021-12-13 22:52:09', },   # Avatar 2009
  { 'movie_id': 120, 'rating': 9, 'date': '2021-12-15 22:52:09', },     # Władca Pierścieni: Drużyna Pierścienia
  { 'movie_id': 121, 'rating': 9, 'date': '2021-12-15 23:52:09', },     # Władca Pierścieni: Dwie wieże
  { 'movie_id': 122, 'rating': 9, 'date': '2021-12-16 22:52:09', },     # Władca Pierścieni: Powrót króla
  { 'movie_id': 8587, 'rating': 8, 'date': '2021-12-17 22:52:09', },    # Król Lew
  { 'movie_id': 157336, 'rating': 8, 'date': '2021-12-18 22:52:09', },  # Insterstellar
  { 'movie_id': 8966, 'rating': 5, 'date': '2021-12-19 22:52:09', },    # Zmierzch
  { 'movie_id': 1858, 'rating': 6, 'date': '2021-12-20 22:52:09', },    # Transformers
  { 'movie_id': 9600, 'rating': 4, 'date': '2022-01-01 22:52:09', },    # Agent XXL
  { 'movie_id': 10020, 'rating': 7, 'date': '2022-01-02 12:52:09', },   # Piękna i Bestia
  { 'movie_id': 771, 'rating': 8, 'date': '2022-01-03 12:01:09', },     # Kevin sam w domu
])
ratings =  dodajOcenyNowegoUzytkownika(ratings, [
  { 'movie_id': 11, 'rating': 10, 'date': '2015-03-10 22:52:09', },     # Star Wars IV Nowa Nadzieja
  { 'movie_id': 1893, 'rating': 10, 'date': '2015-03-12 22:52:09', },   # Star Wars I Mroczne widmo
  { 'movie_id': 1894, 'rating': 10, 'date': '2015-03-13 22:52:09', },   # Star Wars II Atak klonów
  { 'movie_id': 1895, 'rating': 10, 'date': '2015-03-15 22:52:09', },   # Star Wars III Zemsta Sithów
  { 'movie_id': 13, 'rating': 10, 'date': '2021-12-25 22:52:09', },     # Forest Gump
])
ratings

Unnamed: 0,user_id,movie_id,rating,date
0,1,197,2,1425941529.0
1,11,197,7,1231676989.0
2,22,197,10,1111937009.0
3,24,197,10,979870012.0
4,29,197,6,1044020005.0
...,...,...,...,...
25981470,270898,11,10,1426027929.0
25981471,270898,1893,10,1426200729.0
25981472,270898,1894,10,1426287129.0
25981473,270898,1895,10,1426459929.0


### Przygotowanie modelu

In [None]:
reader = Reader(line_format='user item rating timestamp', rating_scale=(1, 10), sep='\t', skip_lines=1)
data = Dataset.load_from_file(RATINGS_FILE_TMP, reader=reader)

train, test = split.train_test_split(data, test_size=0.2)

In [None]:
model = SVD(lr_all=params_rmse['lr_all'], n_epochs=params_rmse['n_epochs'], n_factors=params_rmse['n_factors'], reg_all=params_rmse['reg_all'])

### Trening modelu

In [None]:
cross_validate(model, data, measures=['RMSE', 'MSE'], cv=5, verbose=True, n_jobs=1)

Evaluating RMSE, MSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6323  1.6311  1.6321  1.6312  1.6314  1.6316  0.0005  
MSE (testset)     2.6642  2.6605  2.6636  2.6607  2.6616  2.6621  0.0015  
Fit time          1902.19 1936.75 1975.72 1995.78 2003.66 1962.82 38.15   
Test time         231.36  251.19  218.02  269.65  268.06  247.66  20.28   


{'fit_time': (1902.1934461593628,
  1936.749757051468,
  1975.7199921607971,
  1995.7808756828308,
  2003.6639802455902),
 'test_mse': array([2.66424977, 2.66054732, 2.66362023, 2.66072093, 2.66156986]),
 'test_rmse': array([1.63225297, 1.63111843, 1.63206012, 1.63117164, 1.63143184]),
 'test_time': (231.35679531097412,
  251.1932897567749,
  218.02030539512634,
  269.65231442451477,
  268.05984592437744)}

### Test modelu

In [None]:
m_test = model.test(test)
accuracy.rmse(m_test)

RMSE: 1.5515


1.5515365636443497

In [None]:
s_movie['user_1'] = s_movie.apply(lambda movie: model.predict(uid=str(USER_TEST - 1), iid=str(movie.id)).est, axis=1)
s_movie[~s_movie.id.isin(ratings[ratings.user_id == USER_TEST - 1].movie_id.unique())].sort_values('user_1', ascending=False).head()

Unnamed: 0,id,title,user_1
35895,192040,Planet Earth,8.869362
223,278,The Shawshank Redemption,8.838262
7,14,American Beauty,8.819392
44974,420714,Planet Earth II,8.783792
20212,49961,A Song of Lisbon,8.775225


In [None]:
s_movie['user_2'] = s_movie.apply(lambda movie: model.predict(uid=str(USER_TEST), iid=str(movie.id)).est, axis=1)
s_movie[~s_movie.id.isin(ratings[ratings.user_id == USER_TEST].movie_id.unique())].sort_values('user_2', ascending=False).head(20)

Unnamed: 0,id,title,user_1,user_2
25861,76600,Avatar 2,6.209008,10.0
19921,49026,The Dark Knight Rises,8.012351,9.912635
9031,19995,Avatar,7.367769,9.845073
33105,140607,Star Wars: The Force Awakens,7.909278,9.789712
61,98,Gladiator,8.279036,9.776319
44110,385738,Dreamland,7.916426,9.776185
22097,57158,The Hobbit: The Desolation of Smaug,7.30338,9.756186
14531,35123,The Verdict,7.375048,9.753789
630,808,Shrek,7.783744,9.739753
33136,141052,Justice League,6.279764,9.734443


# Zapisanie czystego modelu

### Przygotowanie danych

In [None]:
ratings = zaladuj_oceny(RATINGS_FILE)
ratings

Unnamed: 0,user_id,movie_id,rating,date
0,1,197,2,1425941529.0
1,11,197,7,1231676989.0
2,22,197,10,1111937009.0
3,24,197,10,979870012.0
4,29,197,6,1044020005.0
...,...,...,...,...
25981452,270887,126076,10,1472697413.0
25981453,270887,56491,10,1486961830.0
25981454,270887,302042,8,1479088587.0
25981455,270887,23476,8,1493084042.0


In [None]:
reader = Reader(line_format='user item rating timestamp', rating_scale=(1, 10), sep='\t', skip_lines=1)
data = Dataset.load_from_file(RATINGS_FILE_TMP, reader=reader)

train, test = split.train_test_split(data, test_size=0.2)

### Przygotowanie modelu

In [None]:
model = SVD(lr_all=params_rmse['lr_all'], n_epochs=params_rmse['n_epochs'], n_factors=params_rmse['n_factors'], reg_all=params_rmse['reg_all'])

### Trening modelu

In [None]:
cross_validate(model, data, measures=['RMSE', 'MSE'], cv=5, verbose=True, n_jobs=1)

model.fit(train)

Evaluating RMSE, MSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6885  1.6881  1.6879  1.6876  1.6889  1.6882  0.0005  
MSE (testset)     2.8510  2.8495  2.8492  2.8478  2.8525  2.8500  0.0016  
Fit time          836.07  889.75  901.95  908.16  904.31  888.05  26.71   
Test time         165.37  188.13  191.63  212.53  177.90  187.11  15.65   


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f33896bdd10>

### Test modelu

In [None]:
m_test = model.test(test)

In [None]:
accuracy.rmse(m_test)

RMSE: 1.6871


1.687094485405961

In [None]:
s_movie['model'] = s_movie.apply(lambda movie: model.predict('1', str(movie.id)).est, axis=1)
s_movie.sort_values('model', ascending=False).head(20)

Unnamed: 0,id,title,model
223,278,The Shawshank Redemption,9.74985
44699,409926,Cosmos,9.741451
18502,44671,Racing Dreams,9.718931
7,14,American Beauty,9.698026
44974,420714,Planet Earth II,9.660885
21077,53197,A Grin Without a Cat,9.626146
84,122,The Lord of the Rings: The Return of the King,9.607311
20504,51129,The Master and Margarita,9.607004
42076,331214,Band of Brothers,9.578199
40731,295069,Saturday Night,9.573657


### Zapisanie modelu

In [None]:
MODEL_SAVE = os.path.join(GOOGLE_MOUNT_PATH, 'model.pickle')

In [None]:
dump.dump(MODEL_SAVE, algo=model, predictions=m_test)