Reference: [Recommendation System Implementation With Deep Learning and PyTorch](https://medium.com/swlh/recommendation-system-implementation-with-deep-learning-and-pytorch-a03ee84a96f4)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import _LRScheduler

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import onnx

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import math
import copy
from pathlib import Path
from itertools import zip_longest

In [None]:
def set_random_seed(state=1):
    gens = (np.random.seed, torch.manual_seed, torch.cuda.manual_seed)
    for set_state in gens:
        set_state(state)

In [None]:
RANDOM_STATE = 1
set_random_seed(RANDOM_STATE)

# Prepare data

In [None]:
def read_data(path):
    files = {}
    for filename in path.glob('*'):
        if filename.suffix == '.csv':
            files[filename.stem] = pd.read_csv(filename)
    return files['input']
input_df = read_data(Path('data/input'))

In [None]:
def create_dataset(input_df, top=None):    
    num_entries = input_df.shape[0]
    num_fields = input_df.shape[1] - 1
    
    X = input_df.drop(['Response'], axis=1)
    y = input_df['Response'].astype(int)
    return (num_entries, num_fields), (X, y)

In [None]:
(num_entries, num_fields), (X, y) = create_dataset(input_df)
print(f'{num_entries} entries, {num_fields} fields')
print(f'Dataset shape: {X.shape}')
print(f'Target shape: {y.shape}')

In [None]:
class ReviewsIterator:
    def __init__(self, X, y, batch_size=32, shuffle=True):
        X, y = np.asarray(X), np.asarray(y)
        
        if shuffle:
            index = np.random.permutation(X.shape[0])
            X, y = X[index], y[index]
            
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.n_batches = int(math.ceil(X.shape[0] // batch_size))
        self._current = 0
        
    def __iter__(self):
        return self
    
    def __next__(self):
        return self.next()
    
    def next(self):
        if self._current >= self.n_batches:
            raise StopIteration()
        k = self._current
        self._current += 1
        bs = self.batch_size
        return self.X[k*bs:(k + 1)*bs], self.y[k*bs:(k + 1)*bs]

In [None]:
def batches(X, y, bs=32, shuffle=True):
    for xb, yb in ReviewsIterator(X, y, bs, shuffle):
        xb = [[
                torch.tensor(x_field).float() 
                if isinstance(x_field, (int, float)) else x_field for x_field in x
            ] for x in xb]
        xb = [[
                torch.tensor(np.array([int(digit) for digit in x_field])) 
                if isinstance(x_field, str) else x_field for x_field in x
            ] for x in xb]

        xb = [torch.cat([x[0].unsqueeze(0)] + [torch.tensor([[elem]]) for elem in x[1:]], dim=1) for x in xb]
        
        xb = torch.stack(xb)
        yb = torch.FloatTensor(yb)
        yield xb, yb.view(-1, 1)

# Model

In [None]:
class RecommendationModel(nn.Module):
    def __init__(self, num_features, num_factors=50, embedding_dropout=0.02, hidden=10, dropouts=0.2):
        super().__init__()
        hidden = get_list(hidden)
        dropouts = get_list(dropouts)
        n_last = hidden[-1]

        def gen_layers(n_in):
            nonlocal hidden, dropouts
            assert len(dropouts) <= len(hidden)

            for n_out, rate in zip_longest(hidden, dropouts):
                yield nn.Linear(n_in, n_out)
                yield nn.ReLU()
                if rate is not None and rate > 0.:
                    yield nn.Dropout(rate)
                n_in = n_out

        self.features = nn.Linear(num_features, num_factors * 2)
        self.drop = nn.Dropout(embedding_dropout)
        self.hidden = nn.Sequential(*list(gen_layers(num_factors * 2)))
        self.fc = nn.Linear(n_last, 1)
        self._init()

    def forward(self, x):
        x = self.features(x)
        x = self.drop(x)
        x = self.hidden(x)
        out = torch.sigmoid(self.fc(x))
        return out

    def _init(self):
        def init(m):
            if type(m) == nn.Linear:
                torch.nn.init.xavier_uniform_(m.weight)
                m.bias.data.fill_(0.01)

        self.features.apply(init)
        self.hidden.apply(init)
        init(self.fc)


def get_list(n):
    if isinstance(n, (int, float)):
        return [n]
    elif hasattr(n, '__iter__'):
        return list(n)
    raise TypeError('layers configuration should be a single number or a list of numbers')

# Training

### Cyclical Learing Rate

In [None]:
class CyclicLR(_LRScheduler):
    
    def __init__(self, optimizer, schedule, last_epoch=-1):
        assert callable(schedule)
        self.schedule = schedule
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        return [self.schedule(self.last_epoch, lr) for lr in self.base_lrs]

In [None]:
def triangular(step_size, max_lr, method='triangular', gamma=0.99):
    
    def scheduler(epoch, base_lr):
        period = 2 * step_size
        cycle = math.floor(1 + epoch/period)
        x = abs(epoch/step_size - 2*cycle + 1)
        delta = (max_lr - base_lr)*max(0, (1 - x))

        if method == 'triangular':
            pass  # we've already done
        elif method == 'triangular2':
            delta /= float(2 ** (cycle - 1))
        elif method == 'exp_range':
            delta *= (gamma**epoch)
        else:
            raise ValueError('unexpected method: %s' % method)
            
        return base_lr + delta
        
    return scheduler

In [None]:
def cosine(t_max, eta_min=0):
    
    def scheduler(epoch, base_lr):
        t = epoch % t_max
        return eta_min + (base_lr - eta_min)*(1 + math.cos(math.pi*t/t_max))/2
    
    return scheduler

In [None]:
def plot_lr(schedule):
    ts = list(range(1000))
    y = [schedule(t, 0.001) for t in ts]
    plt.plot(ts, y)

### Training Loop

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
datasets = {'train': (X_train, y_train), 'val': (X_valid, y_valid)}
dataset_sizes = {'train': len(X_train), 'val': len(X_valid)}

print(dataset_sizes)

In [None]:
rec = RecommendationModel(
    num_features=36, 
    num_factors=30,
    hidden=[100, 200, 300, 200, 100],
    dropouts=[0.25, 0.5, 0.5, 0.25]
)
print(rec)

In [None]:
lr = 1e-3
wd = 1e-5
bs = 2000
n_epochs = 200
patience = 20
no_improvements = 0
best_loss = np.inf
best_weights = None
history = []
lr_history = []

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

rec.to(device)
criterion = nn.MSELoss(reduction='sum')
optimizer = optim.Adam(rec.parameters(), lr=lr, weight_decay=wd)
iterations_per_epoch = int(math.ceil(dataset_sizes['train'] // bs))
if iterations_per_epoch == 0: iterations_per_epoch = 1
scheduler = CyclicLR(optimizer, cosine(t_max=iterations_per_epoch * 2, eta_min=lr/10))

In [None]:
for epoch in range(n_epochs):
    stats = {'epoch': epoch + 1, 'total': n_epochs}
    
    for phase in ('train', 'val'):
        if phase == 'train':
            training = True
        else:
            training = False

        running_loss = 0
        n_batches = 0
        
        for batch in batches(*datasets[phase], shuffle=training, bs=bs):
            x_batch, y_batch = [b.to(device) for b in batch]
            x_batch = x_batch.view(x_batch.size(0), -1)
            optimizer.zero_grad()
        
            # compute gradients only during 'train' phase
            with torch.set_grad_enabled(training):
                outputs = rec(x_batch)
                loss = criterion(outputs, y_batch)
                
                # don't update weights and rates when in 'val' phase
                if training:
                    scheduler.step()
                    loss.backward()
                    optimizer.step()
                    lr_history.extend(scheduler.get_lr())
                    
            running_loss += loss.item()
            
        epoch_loss = running_loss / dataset_sizes[phase]
        stats[phase] = epoch_loss
        
        # early stopping: save weights of the best model so far
        if phase == 'val':
            if epoch_loss < best_loss:
                print('loss improvement on epoch: %d' % (epoch + 1))
                best_loss = epoch_loss
                best_weights = copy.deepcopy(rec.state_dict())
                no_improvements = 0
            else:
                no_improvements += 1
                
    history.append(stats)
    print('[{epoch:03d}/{total:03d}] train: {train:.4f} - val: {val:.4f}'.format(**stats))
    if no_improvements >= patience:
        print('early stopping after epoch {epoch:03d}'.format(**stats))
        break

In [None]:
ax = pd.DataFrame(history).drop(columns='total').plot(x='epoch')

In [None]:
_ = plt.plot(lr_history[:2*iterations_per_epoch])

In [None]:
rec.load_state_dict(best_weights)

In [None]:
groud_truth, predictions = [], []

with torch.no_grad():
    for batch in batches(*datasets['val'], shuffle=False, bs=bs):
        x_batch, y_batch = [b.to(device) for b in batch]
        outputs = rec(x_batch)
        groud_truth.extend(y_batch.tolist())
        predictions.extend(outputs.tolist())

groud_truth = np.asarray(groud_truth).ravel()
predictions = np.asarray(predictions).ravel()

print(f'groud_truth: {groud_truth}')
print(f'predictions: {predictions}')
print(f'decision: {np.round(predictions).astype(int)}')
print('rmse: %.4f' % np.sqrt(mean_squared_error(groud_truth, predictions)))

In [None]:
checkpoint_dir = 'checkpoints'
if os.path.exists(checkpoint_dir) is False:
    os.makedirs(checkpoint_dir)

checkpoint_path = f'{checkpoint_dir}/recmodel.pth'
torch.save(rec, checkpoint_path)

# Save model

In [None]:
# Save model as ONNX
rec.eval()
dummy_input = torch.randn(1, 36)
torch.onnx.export(rec, dummy_input, 'model.onnx', opset_version=11)