# Test

> Ce fichier permet de tester les différentes fonctions implémentées et vérifier qu'elles fonctionnent correctement

Charger les bibliothèques et les données

In [1]:
%autosave 0
%reload_ext autoreload
%autoreload 2

Autosave disabled


# load_data

In [None]:
from data import *

M = load_data(tiny=True)

assert M.shape == (50,40)
assert np.sum(~np.isnan(M)) == 389
print(movie_title(0))
print(movie_title(np.arange(4)))

# popularity

In [None]:
from data import *
import popularity

M = load_data(tiny=True)

# note moyenne par film
print(np.nanmean(M, axis=1))

# utilisateur pris au hasard
rec = popularity.recommend(M, 3, new=True)
print(rec)
print(movie_title(rec))
assert np.isnan(M[3, rec])

# utilisateur ayant déjà vu le film le plus populaire
rec = popularity.recommend(M, 5, new=True)
print(rec)
print(movie_title(rec))
assert np.isnan(M[5, rec])

# complétion de toute la matrice
M_completed = popularity.complete(M)
assert np.all(~np.isnan(M_completed))
assert np.all(M_completed[~np.isnan(M)] == M[~np.isnan(M)])

# knn

In [None]:
from data import *
import knn

M = load_data(tiny=True)

# utilisateur pris au hasard
rec = knn.recommend(M, 3, new=True, k=10)
print(rec)
print(movie_title(rec))
assert np.isnan(M[3, rec])


# complétion de toute la matrice
M_completed = knn.complete(M, k=10)
assert np.all(~np.isnan(M_completed))

# svd

In [None]:
from data import *
import svd
from eval import *
import matplotlib.pyplot as plt

M = load_data(tiny=True)

# utilisateur pris au hasard
rec = svd.recommend(M, 3, new=True, k=10)
print(rec)
print(movie_title(rec))
assert np.isnan(M[3, rec])

# complétion de toute la matrice
M_completed = svd.complete(M, k=10)
assert np.all(~np.isnan(M_completed))

# erreur (d'entrainement) diminue quand k augmente
err = [RMSE(svd.complete(M, k), M) for k in range(40)]

plt.plot(err)
plt.grid()

# erreur (d'entrainement) nulle pour k=min(dim())
M_completed = svd.complete(M, k=min(M.shape[0], M.shape[1]))
assert RMSE(M_completed, M) < 10**-6

# ALS

In [None]:
from data import *
import als
from eval import *
import matplotlib.pyplot as plt

M = load_data(tiny=True)

# utilisateur pris au hasard
rec = als.recommend(M, 3, new=True, k=10)
print(rec)
print(movie_title(rec))
assert np.isnan(M[3, rec])

# complétion de toute la matrice
M_completed = als.complete(M, k=10)
assert np.all(~np.isnan(M_completed))


# erreur (d'entrainement) diminue quand k augmente

In [None]:
err = [RMSE(als.complete(M, k, n_iter=5), M) for k in range(40)]

print(err)
plt.plot(err)
plt.grid()

# erreur (d'entrainement) diminue quand nombre d'itérations augmente

In [None]:
err20 = [RMSE(als.complete(M, k, n_iter=20), M) for k in range(40)]

print(err20)
plt.plot(err, col=1, label='5 iterations')
plt.plot(err20, col=2, label='20 iterations')
plt.legend()
plt.grid()

# erreur (d'entrainement) nulle pour k=min(dim()) et lambda très petit

In [None]:
M_completed = als.complete(M, k=min(M.shape[0], M.shape[1]), n_iter=1, lambd=10**-8)
RMSE(M_completed, M)
M_completed = als.complete(M, k=min(M.shape[0], M.shape[1]), n_iter=3, lambd=10**-8)
RMSE(M_completed, M)
M_completed = als.complete(M, k=min(M.shape[0], M.shape[1]), n_iter=10, lambd=10**-8)
RMSE(M_completed, M)
assert RMSE(M_completed, M) <10**-6







# eval

In [None]:
from data import *
from eval import *
import popularity
import knn
import svd

M = load_data(tiny=True)

# get_train

In [None]:
M_train, M_validation = get_train_val(M, 0.9)
assert np.all(np.sum(np.isnan(M_train), axis=0))

# RMSE

In [None]:
print(M[0, :4])
assert np.sum(~np.isnan(M)) == 389
M_comp = M.copy()
M_comp[0, :4] = 0
assert RMSE(M_comp, M) == np.sqrt((5**2 + 3**2 + 4**2)/389)

# quantitative_comparison

In [None]:
recommenders = [
    {"fn": popularity.complete, "label": "popularity"},
    {"fn": lambda M_train: knn.complete(M, k=10), "label": "knn10"},
    {"fn": lambda M_train: svd.complete(M, 10), "label": "svd10"},
    {"fn": lambda M_train: svd.complete(M, 20), "label": "svd20"}
    ]
quantitative_comparison(RMSE, M, recommenders, prop=0.8, nrep=10)


