# Colaborative filtering

In [1]:
# It's a good idea to ensure you're running the latest version of any libraries you need.
# `!pip install -Uqq <libraries>` upgrades to the latest version of <libraries>
# NB: You can safely ignore any warnings or errors pip spits out about running as root or incompatibilities
!pip install -Uqq fastai

In [2]:
# import os
from fastcore.all import *
from fastai.collab import *
from fastai.tabular.all import *
import pandas as pd
from fastdownload import download_url
from fastai.vision.all import *

In [28]:
# Use this to download example dataset
# path = untar_data(URLs.ML_SAMPLE)
# path = path/'ratings.csv'
path = Path('..\\datasets\\movies\\ratings_small.csv')

df = pd.read_csv(path)
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


## Tabular learner

In [29]:
dls = TabularDataLoaders.from_df(df,
                                 path,
                                 procs=[Categorify],
                                 cat_names=['userId', 'movieId'], 
                                 cont_names=['timestamp'], 
                                 y_names='rating',
                                 bs=64)
# dls.show_batch()

In [30]:
learn = tabular_learner(dls)
learn.fit_one_cycle(3)

epoch,train_loss,valid_loss,time
0,0.8567,0.849904,00:29
1,0.794739,0.771307,00:28
2,0.64845,0.767928,00:28


In [31]:
learn.fine_tune(3)

epoch,train_loss,valid_loss,time
0,0.783015,0.821867,00:29


epoch,train_loss,valid_loss,time
0,0.696961,0.792319,00:29
1,0.647349,0.785243,00:28
2,0.55092,0.802689,00:28


In [35]:
# Test on some examples
data = [[73, 1097, 1255504951],  # -> 4
       [1, 31, 1260759144]]  # -> 2.5
df = pd.DataFrame(data, columns=['userId', 'movieId', 'timestamp'])
dl = learn.dls.test_dl(df)
rating,probs = learn.get_preds(1, dl)
print(f"Rating: {rating}.")

Rating: tensor([[3.7432],
        [2.5423]]).


## Colaborative learner

In [46]:
df = pd.read_csv(path)
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [47]:
dls = CollabDataLoaders.from_df(df, bs=64)
# dls.show_batch()

In [48]:
learn = collab_learner(dls, y_range=(0.5,5.5))
learn.fit_one_cycle(3)

epoch,train_loss,valid_loss,time
0,0.991741,0.965544,00:13
1,0.787455,0.81415,00:13
2,0.721019,0.802621,00:13


In [49]:
learn.fine_tune(3)

epoch,train_loss,valid_loss,time
0,0.731023,0.769657,00:13


epoch,train_loss,valid_loss,time
0,0.631885,0.754297,00:13
1,0.615837,0.745336,00:13
2,0.520116,0.745153,00:13


In [52]:
# Test on some examples
data = [[73, 1097],  # -> 4
       [1, 31]]  # -> 2.5
df = pd.DataFrame(data, columns=['userId', 'movieId'])
dl = learn.dls.test_dl(df)
rating, probs = learn.get_preds(1, dl)
print(f"Rating: {rating}.")

Rating: tensor([3.8820, 2.5338]).
