## 0. Imports

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path
import pandas as pd
from nn_utils import *

## 1. Dataset

In [3]:
path = Path('./data/ml-latest-small')

In [4]:
ratings_df = pd.read_csv(path / 'ratings.csv')
movies_df = pd.read_csv(path / 'movies.csv')
id_to_movie = dict(zip(movies_df['movieId'], movies_df['title']))

In [5]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
ratings_data = CollabData(ratings_df, test_size=0.2, bs=2048)
ratings_data.show_batch()

   userId  movieId  rating
0   104.0   8145.0     4.5
1   554.0   2729.0     4.0
2   552.0   5701.0     4.0
3   558.0    527.0     3.0
4   589.0    257.0     4.5
5    88.0   8697.0     3.5
6   293.0   1857.0     2.0
7   555.0   4421.0     4.0
8   155.0   1439.0     3.0
9   379.0   6303.0     3.0


## 2. Model

## 3. Learner

## 4. Train

In [7]:
n_factors = 200
embedding_dropout = 0.02
hidden = [256, 128, 64, 32]
dense_dropouts = [0.5, 0.5, 0.35, 0.25]

learn = CollabLearner(ratings_data, EmbedNet, n_factors=n_factors, embedding_dropout=embedding_dropout, hidden=hidden, dense_dropouts=dense_dropouts)
learn.model

EmbedNet(
  (u): Embedding(610, 200)
  (m): Embedding(9724, 200)
  (d): Dropout(p=0.02, inplace=False)
  (hidden_fc): Sequential(
    (0): Linear(in_features=400, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=256, out_features=128, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.35, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.25, inplace=False)
  )
  (last_fc): Linear(in_features=32, out_features=1, bias=True)
)

In [8]:
n_epochs = 20
lr = 3e-3
wd = 1e-5

learn.fit(n_epochs, lr, wd)

epoch: 0, train loss: 1.1355032920837402, validation loss: 1.0422738790512085
epoch: 1, train loss: 1.0117918252944946, validation loss: 1.004041075706482
epoch: 2, train loss: 0.9793491363525391, validation loss: 0.9799113273620605
epoch: 3, train loss: 0.9525758624076843, validation loss: 0.960098147392273
epoch: 4, train loss: 0.9312750697135925, validation loss: 0.946039080619812
epoch: 5, train loss: 0.9090015292167664, validation loss: 0.9338878393173218
epoch: 6, train loss: 0.8906547427177429, validation loss: 0.9209505915641785
epoch: 7, train loss: 0.8713659048080444, validation loss: 0.9154750108718872
epoch: 8, train loss: 0.8575087785720825, validation loss: 0.9117218255996704
epoch: 9, train loss: 0.8444257974624634, validation loss: 0.9091752171516418
epoch: 10, train loss: 0.8351204991340637, validation loss: 0.9052647352218628
epoch: 11, train loss: 0.8272959589958191, validation loss: 0.9075983762741089
epoch: 12, train loss: 0.8183246850967407, validation loss: 0.907

## 5. Predict

In [9]:
learn.get_preds('val')

array([[3.50232983, 4.        ],
       [2.81311989, 2.        ],
       [1.81216836, 1.        ],
       ...,
       [3.89464378, 4.        ],
       [3.71712017, 4.5       ],
       [3.55897284, 5.        ]])

## 6. Fine Tuning

In [10]:
# TODO

## 7. Evaluation

In [11]:
# TODO