# Przygotowanie środowiska colab

In [1]:
!pip install scikit-surprise

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 8.3MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1617587 sha256=fe49925fc2c3c289246b997ec5c833547fc6b6a3bb06e310d7251b4dcc59f9b0
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1


In [2]:
!ln -s /root/.surprise_data/ml-100k ml-100k

## Przygotowanie algorytmów i biblioteki surprise

In [3]:
from surprise import SlopeOne
from surprise import CoClustering

from surprise import SVD
from surprise import SVDpp
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.prediction_algorithms.random_pred import NormalPredictor

In [4]:
data = Dataset.load_builtin('ml-100k', prompt=False)

Trying to download dataset from http://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k


In [5]:
full_data = data.build_full_trainset()
print("Number of users: ", data.build_full_trainset().n_users)
print("Number of movies: ", data.build_full_trainset().n_items)
print("Number of ratings: ", data.build_full_trainset().n_ratings)
print("Rating scale: ", data.build_full_trainset().rating_scale)

Number of users:  943
Number of movies:  1682
Number of ratings:  100000
Rating scale:  (1, 5)


# Eksperymenty dla różnych hiperparametrów - ilości klastrów

In [8]:
algorithms = {
    'SlopeOne': SlopeOne(),
    'CoClustering': CoClustering(), # warości domyślne: 3, 3
    'CoClustering_3': CoClustering(n_cltr_u = 3, n_cltr_i = 3),
    'CoClustering_5': CoClustering(n_cltr_u = 5, n_cltr_i = 5),
    'CoClustering_7': CoClustering(n_cltr_u = 7, n_cltr_i = 7),
    'CoClustering_11': CoClustering(n_cltr_u = 11, n_cltr_i = 11),
    'SVD': SVD(),
    'SVDpp': SVDpp(),
    'NormalPredictor': NormalPredictor()
}

In [9]:
for k in algorithms:
  print(f'\nAlgorithm {k}')
  cross_validate(algorithms[k], data, measures=['MAE'], cv=5, verbose=True);


Algorithm SlopeOne
Evaluating MAE of algorithm SlopeOne on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.7367  0.7452  0.7449  0.7438  0.7435  0.7428  0.0031  
Fit time          1.18    1.12    1.18    1.13    1.14    1.15    0.03    
Test time         4.46    4.23    4.31    4.37    4.26    4.33    0.08    

Algorithm CoClustering
Evaluating MAE of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.7660  0.7627  0.7514  0.7533  0.7485  0.7564  0.0068  
Fit time          2.49    2.52    2.52    2.52    2.49    2.51    0.02    
Test time         0.15    0.27    0.15    0.28    0.27    0.22    0.06    

Algorithm CoClustering_3
Evaluating MAE of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.7504  0.7701  0.7567  0.7503  0.7527  0.7561  0.0074  
Fit time     

In [None]:
trainset, testset = train_test_split(data, test_size=.3)

for k in algorithms:
  algorithm = algorithms[k]
  print(f'\nAlgorithm {k}')
  algorithm.fit(trainset)
  predictions = algorithm.test(testset)
  accuracy.mae(predictions)