## 0. Imports

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path
import pandas as pd
from utils.generic import *
from utils.nn import *

## 1. Dataset

In [52]:
path = Path('./data')

MovieLens 100K

In [55]:
ratings_100k, movies_100k = import_datasets(path/'ml-latest-small', filetype='csv')
ratings_100k.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


MovieLens 1M

In [56]:
ratings_1m, movies_1m = import_datasets(path/'ml-1m', filetype='dat')
ratings_1m.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [61]:
ratings_df = ratings_100k # change here to change dataset
ratings_data = CollabData(ratings_df, test_size=0.1, bs=256)
ratings_data.show_batch()

   userId  movieId  rating
0   306.0   1062.0     2.5
1     9.0   7039.0     4.5
2   218.0    190.0     4.0
3   602.0   2200.0     4.0
4   356.0   5363.0     4.0
5   563.0   6985.0     5.0
6   303.0   1216.0     4.0
7   199.0   1959.0     3.0
8    61.0   7609.0     3.5
9    90.0   2036.0     3.0


## 2. Learner & Model

In [83]:
n_factors = 100
embedding_dropout = 0.02
hidden = [300, 200, 100]
dense_dropouts = [0.5, 0.4, 0.3]

learn = CollabLearner(ratings_data, EmbedNet, n_factors=n_factors, embedding_dropout=embedding_dropout, hidden=hidden, dense_dropouts=dense_dropouts)
learn.model

EmbedNet(
  (u): Embedding(610, 100)
  (m): Embedding(9724, 100)
  (d): Dropout(p=0.02, inplace=False)
  (hidden_fc): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=300, out_features=200, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.4, inplace=False)
    (6): Linear(in_features=200, out_features=100, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.3, inplace=False)
  )
  (last_fc): Linear(in_features=100, out_features=1, bias=True)
)

## 4. Train

### 4.1 Static Learning Rate

In [103]:
learn.model.random_weights()

n_epochs = 10
lr = 1e-3
wd = 5e-1

learn.fit(n_epochs, lr, wd)

epoch: 0, train loss: 1.0141748189926147, validation loss: 0.9733198285102844
epoch: 1, train loss: 0.9547224044799805, validation loss: 0.9447818994522095
epoch: 2, train loss: 0.9278314113616943, validation loss: 0.9248769879341125
epoch: 3, train loss: 0.8993873000144958, validation loss: 0.9141201972961426
epoch: 4, train loss: 0.8702763915061951, validation loss: 0.897013783454895
epoch: 5, train loss: 0.8431944251060486, validation loss: 0.8860737681388855
epoch: 6, train loss: 0.8248551487922668, validation loss: 0.884974479675293
epoch: 7, train loss: 0.8125748038291931, validation loss: 0.8762525320053101
epoch: 8, train loss: 0.8022601008415222, validation loss: 0.8797053098678589
epoch: 9, train loss: 0.791161060333252, validation loss: 0.8852871060371399


### 4.2 Dynamic Learning Rate with One-Cycle Policy

In [101]:
learn.model.random_weights()

n_epochs = 10
lrs = 6e-3
wd = 1e-0

learn.fit_one_cycle(cycle_len=n_epochs, lr_max=lrs, wd=wd, div_factor=7, final_div=10, pct_start=0.2, moms=(0.95, 0.85))

epoch: 0, train loss: 1.025206208229065, validation loss: 0.970805287361145
epoch: 1, train loss: 0.9291591644287109, validation loss: 0.9127486944198608
epoch: 2, train loss: 0.8917130827903748, validation loss: 0.897208034992218
epoch: 3, train loss: 0.885204017162323, validation loss: 0.9038486480712891
epoch: 4, train loss: 0.8784090876579285, validation loss: 0.893211841583252
epoch: 5, train loss: 0.8668302893638611, validation loss: 0.8838712573051453
epoch: 6, train loss: 0.8510176539421082, validation loss: 0.8762249946594238
epoch: 7, train loss: 0.8300923705101013, validation loss: 0.8743737936019897
epoch: 8, train loss: 0.8037224411964417, validation loss: 0.874024510383606
epoch: 9, train loss: 0.7759720087051392, validation loss: 0.8688275218009949


## 5. Predict

In [134]:
learn.get_preds('val')

array([[3.85365057, 5.        ],
       [3.82093   , 4.5       ],
       [3.35521889, 4.        ],
       ...,
       [3.80756617, 4.5       ],
       [3.6175766 , 3.        ],
       [3.52774763, 3.        ]])

## 6. Fine Tuning

In [10]:
# TODO

## 7. Evaluation

In [11]:
# TODO