# Model Based Collaborative Filtering with fastai

Source: [Fastai chapter 8](https://github.com/fastai/fastbook/blob/master/08_collab.ipynb) and [Collab learner docs](https://docs.fast.ai/collab.html)

In [None]:
batch_size = 64
num_latent_factors = 50
rating_range=(0, 5.5)
learning_rate = 5e-3
weight_decay = 0.1
num_epoch = 5
layers = [100,50]

In [None]:
!pip install fastai

In [None]:
from fastai.collab import *
from fastai.tabular.all import *
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
path = untar_data(URLs.ML_100k)

## Import data

In [None]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user','movie','rating','timestamp'])
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [None]:
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [None]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [None]:
# number of times user rates 

## Split data

In [None]:
# todo: for each user keep latest 3 movie reviews as test

In [None]:
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)

## Create Dataloader

In [None]:
dls = CollabDataLoaders.from_df(train_df, item_name='title', bs=batch_size)

## Fit learner

### Probabilistic matrix factorization

In [None]:
pmf_learner = collab_learner(dls, n_factors=num_latent_factors, y_range=rating_range)

In [None]:
pmf_learner.fit_one_cycle(num_epoch, learning_rate, wd=weight_decay)

In [None]:
pmf_learner.model

## Deep learning

In [None]:
nn_learner = collab_learner(dls, use_nn=True, y_range=rating_range, layers=layers)

In [None]:
nn_learner.fit_one_cycle(num_epoch, learning_rate, wd=weight_decay)

In [None]:
nn_learner.model

## Evaluation of recommendations

* For ratings: mean squared error
* TODO: For generated recommendations per user: Mean average precision

In [None]:
median_rating = train_df.rating.median()
base_y_pred = np.array([median_rating] * len(test_df))

In [None]:
pmf_y_pred, y_true = pmf_learner.get_preds(dl=pmf_learner.dls.test_dl(test_df))

In [None]:
nn_y_pred, y_true = nn_learner.get_preds(dl=nn_learner.dls.test_dl(test_df))

In [None]:
base_mse = mean_squared_error(base_y_pred, y_true)
pmf_mse = mean_squared_error(pmf_y_pred, y_true)
nn_mse = mean_squared_error(nn_y_pred, y_true)

In [None]:
print(f'Baseline MSE {base_mse}, PMF MSE: {pmf_mse}, NN MSE: {nn_mse}')