In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.utils import shuffle


import pandas as pd
from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz
from datetime import datetime
from pathlib import Path

DATA_PATH = Path('data/')

import warnings
warnings.filterwarnings('ignore')

def load_feather(filepath, **kwargs):
    '''
    input: (path to feather file)
    read feather file to pandas dataframe
    output: (pandas dataframe)
    '''
    return pd.read_feather(filepath, **kwargs)

ratings = load_feather(DATA_PATH/'ratings_explicit_clean.feather')
ratings.drop(['index'], axis=1, inplace=True)
users = load_feather(DATA_PATH/'users_clean.feather')
books = load_feather(DATA_PATH/'books_clean.feather')

  from ._conv import register_converters as _register_converters


In [2]:
u_uniq = ratings.User_ID.unique()
user2idx = {o:i for i,o in enumerate(u_uniq)}
ratings['New_User_ID'] = ratings.User_ID.apply(lambda x: user2idx[x])

m_uniq = ratings.ISBN.unique()
book2idx = {o:i for i,o in enumerate(m_uniq)}
ratings['New_Book_ID'] = ratings.ISBN.apply(lambda x: book2idx[x])

n_users = int(ratings.New_User_ID.nunique())
n_books = int(ratings.New_Book_ID.nunique())

In [3]:
def mse(x, y):
    return np.sqrt(((x-y)**2).mean())

def rmse(x, y): 
    return np.sqrt(mse(x, y))

def mae(x, y): 
    return np.abs((x-y)).mean()

# Training helpers
def get_trainable(model_params):
    return (p for p in model_params if p.requires_grad)


def get_frozen(model_params):
    return (p for p in model_params if not p.requires_grad)


def all_trainable(model_params):
    return all(p.requires_grad for p in model_params)


def all_frozen(model_params):
    return all(not p.requires_grad for p in model_params)


def freeze_all(model_params):
    for param in model_params:
        param.requires_grad = False



metrics=[mse, rmse, mae]


In [4]:
# mean and std of Year_Of_Publication for User
def get_year_of_pub(ratings):
    ratings['Year_Of_Publication'] = ratings.Year_Of_Publication.fillna(round(ratings.Year_Of_Publication.mean()))
    user_group = ratings.groupby('User_ID', as_index=False)['Year_Of_Publication'].agg(
                            {'User_Mean_Year_Of_Publication': 'mean', 'User_Std_Year_Of_Publication': 'std'})
    user_group['User_Mean_Year_Of_Publication'] = round(user_group['User_Mean_Year_Of_Publication'], 2)
    user_group['User_Std_Year_Of_Publication'] = round(user_group['User_Std_Year_Of_Publication'].fillna(0), 2)
    ratings = ratings.merge(user_group, on='User_ID', how='left')
    features = ['Year_Of_Publication', 'User_Mean_Year_Of_Publication', 'User_Std_Year_Of_Publication']
    return ratings, features

def get_age(ratings):
    ratings['Age'] = ratings.Age.fillna(round(ratings.Age.median()))
    features = ['Age']
    return ratings, features

def clean_text(ratings, col, missing_value=None):
    '''
    removes punct and lowers and joins text to single string
    '''
    ratings[col] = ratings[col].str.lower().replace(r'[^A-Za-z]', '', regex=True)
    if missing_value: ratings[col] = ratings[col].fillna(missing_value)
    return ratings, col

def numericalize_col(ratings, col):
    '''
    convert string to category codes 
    '''
    ratings[col] = ratings[col].astype('category').cat.codes
    return ratings 

In [16]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler
    
    
# class TorchDataSetMeta(Dataset):
#     def __init__(self, ratings):
#         self.X = ratings[['New_User_ID', 'New_Book_ID', 'Book_Author', 
#                           'Year_Of_Publication', 'User_Mean_Year_Of_Publication', 'User_Std_Year_Of_Publication',
#                           'Age']].values
#         self.X
# #         self.X = ratings.drop('Book_Rating', axis=1).values
#         self.y = ratings['Book_Rating'].values
#         self.N = len(self.y)
#         self.D = self.X.shape[1]
        
#     def __len__(self):
#         return len(self.y)
    
#     def __getitem__(self, idx):
#         return self.X[idx], self.y[idx]
    
class MixedInputDataSet(Dataset):
    def __init__(self, cats, conts, y):
        self.cats = np.asarray(cats, dtype=np.int64)
        self.conts = np.asarray(conts, dtype=np.float32)
        self.N_cats = self.cats.shape[1]
        self.N_conts = self.conts.shape[1]
        self.X = np.hstack((self.cats, self.conts))
        self.N = len(y)
        y = np.zeros((n,1)) if y is None else y[:,None]
        self.y = np.asarray(y, dtype=np.float32)
            
    def __len__(self): 
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
# 

cat_features = ['New_User_ID', 'New_Book_ID', 'Book_Author', 'Publisher']
cont_features = ['Year_Of_Publication', 'User_Mean_Year_Of_Publication', 'User_Std_Year_Of_Publication', 'Age']
target = 'Book_Rating'


ratings_df = ratings.copy()
ratings_df = ratings_df.merge(books, on='ISBN', how='left')
ratings_df = ratings_df.merge(users[['Age', 'User_ID']], on='User_ID', how='left')
ratings_df, y_of_pub_features = get_year_of_pub(ratings_df)
ratings_df, age_features = get_age(ratings_df)
ratings_df, author_feature = clean_text(ratings_df, 'Book_Author', missing_value='')
ratings_df, publisher_feature = clean_text(ratings_df, 'Publisher', missing_value='')

emb_c = {n: len(c.astype('category').cat.categories)+1 for n,c in ratings_df[cat_features[2:]].items()}
emb_c['n_users'] = n_users
emb_c['n_books'] = n_books
print(emb_c)

ratings_df['Book_Author'].fillna('', inplace=True)
ratings_df['Publisher'].fillna('', inplace=True)
ratings_df = numericalize_col(ratings_df, 'Book_Author')
ratings_df = numericalize_col(ratings_df, 'Publisher')

trainset, testset = train_test_split(ratings_df, test_size=0.2, random_state=100)

# 
min_max_scaler_yop = MinMaxScaler()
trainset[y_of_pub_features] = min_max_scaler_yop.fit_transform(trainset[y_of_pub_features])
testset[y_of_pub_features] = min_max_scaler_yop.transform(testset[y_of_pub_features])

min_max_scaler_age = MinMaxScaler()
trainset[age_features] = min_max_scaler_age.fit_transform(trainset[age_features].values.reshape(-1, 1))
testset[age_features] = min_max_scaler_age.transform(testset[age_features].values.reshape(-1, 1))

# train_torch_data_set_meta = TorchDataSetMeta(trainset)
# test_torch_data_set_meta = TorchDataSetMeta(testset)

batch_size = 32
device = 'cuda'
train_torch_data_set_meta = MixedInputDataSet(trainset[cat_features], trainset[cont_features], trainset[target])
test_torch_data_set_meta = MixedInputDataSet(testset[cat_features], testset[cont_features], testset[target])

train_data_loader_meta = DataLoader(train_torch_data_set_meta, batch_size=batch_size, shuffle=True)
test_data_loader_meta = DataLoader(test_torch_data_set_meta, batch_size=batch_size, shuffle=False)

{'Book_Author': 59240, 'Publisher': 11095, 'n_users': 77805, 'n_books': 185973}


In [6]:
trainset.head(1)

Unnamed: 0,User_ID,ISBN,Book_Rating,New_User_ID,New_Book_ID,Book_Title,Book_Author,Year_Of_Publication,Publisher,language,Age,User_Mean_Year_Of_Publication,User_Std_Year_Of_Publication
362941,229551,809237849,10,64601,165195,Men and Other Reptiles,10085,0.980922,6235,en,0.242105,0.975171,0.011789


In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler
    
    
# class TorchDataSetMeta(Dataset):
#     def __init__(self, ratings):
#         self.X = ratings[['New_User_ID', 'New_Book_ID', 'Book_Author', 
#                           'Year_Of_Publication', 'User_Mean_Year_Of_Publication', 'User_Std_Year_Of_Publication',
#                           'Age']].values
#         self.X
# #         self.X = ratings.drop('Book_Rating', axis=1).values
#         self.y = ratings['Book_Rating'].values
#         self.N = len(self.y)
#         self.D = self.X.shape[1]
        
#     def __len__(self):
#         return len(self.y)
    
#     def __getitem__(self, idx):
#         return self.X[idx], self.y[idx]
    
class MixedInputDataSet(Dataset):
    def __init__(self, cats, conts, y):
        self.cats = np.asarray(cats, dtype=np.int64)
        self.conts = np.asarray(conts, dtype=np.float32)
        self.N_cats = self.cats.shape[1]
        self.N_conts = self.conts.shape[1]
        self.X = np.hstack((self.cats, self.conts))
        self.N = len(y)
        y = np.zeros((n,1)) if y is None else y[:,None]
        self.y = np.asarray(y, dtype=np.float32)
            
    def __len__(self): 
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
# 

cat_features = ['New_User_ID', 'New_Book_ID', 'Book_Author', 'Publisher']
cont_features = ['Year_Of_Publication', 'User_Mean_Year_Of_Publication', 'User_Std_Year_Of_Publication', 'Age']
target = 'Book_Rating'


ratings_df = ratings.copy()
ratings_df = ratings_df.merge(books, on='ISBN', how='left')
ratings_df = ratings_df.merge(users[['Age', 'User_ID']], on='User_ID', how='left')
ratings_df, y_of_pub_features = get_year_of_pub(ratings_df)
ratings_df, age_features = get_age(ratings_df)
ratings_df, author_feature = clean_text(ratings_df, 'Book_Author', missing_value='')
ratings_df, publisher_feature = clean_text(ratings_df, 'Publisher', missing_value='')

emb_c = {n: len(c.astype('category').cat.categories)+1 for n,c in ratings_df[cat_features[2:]].items()}
emb_c['n_users'] = n_users
emb_c['n_books'] = n_books
print(emb_c)

ratings_df['Book_Author'].fillna('', inplace=True)
ratings_df['Publisher'].fillna('', inplace=True)
ratings_df = numericalize_col(ratings_df, 'Book_Author')
ratings_df = numericalize_col(ratings_df, 'Publisher')

trainset, testset = train_test_split(ratings_df, test_size=0.2, random_state=100)


from sklearn.model_selection import KFold
# 
min_max_scaler_yop = MinMaxScaler()
trainset[y_of_pub_features] = min_max_scaler_yop.fit_transform(trainset[y_of_pub_features])
testset[y_of_pub_features] = min_max_scaler_yop.transform(testset[y_of_pub_features])

min_max_scaler_age = MinMaxScaler()
trainset[age_features] = min_max_scaler_age.fit_transform(trainset[age_features].values.reshape(-1, 1))
testset[age_features] = min_max_scaler_age.transform(testset[age_features].values.reshape(-1, 1))



batch_size = 32
device = 'cuda'

from sklearn.model_selection import KFold, StratifiedKFold

X = trainset[cat_features + cont_features]
y = trainset[target]

kfold = StratifiedKFold(3)
for train_index, val_index in skf.split(X, y):
    # need to do scaling corectly 
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    train_torch_data_set_meta = MixedInputDataSet(X_train[cat_features], X_train[cont_features], trainset[target])
    val_torch_data_set_meta = MixedInputDataSet(X_val[cat_features], X_val[cont_features], y_val)
    train_data_loader_meta = DataLoader(train_torch_data_set_meta, batch_size=batch_size, shuffle=True)
    val_data_loader_meta = DataLoader(val_torch_data_set_meta, batch_size=batch_size, shuffle=False)


# train_torch_data_set_meta = MixedInputDataSet(trainset[cat_features], trainset[cont_features], trainset[target])
# test_torch_data_set_meta = MixedInputDataSet(testset[cat_features], testset[cont_features], testset[target])

# train_data_loader_meta = DataLoader(train_torch_data_set_meta, batch_size=batch_size, shuffle=True)
# test_data_loader_meta = DataLoader(test_torch_data_set_meta, batch_size=batch_size, shuffle=False)

In [18]:
len(cat_features), len(cont_features)

(4, 4)

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

n_factors = 100
max_rating = float(trainset.Book_Rating.max())
min_rating = float(trainset.Book_Rating.min())


def init_embeddings(x):
    x = x.weight.data
    value = 2 / (x.size(1) + 1)
    x.uniform_(-value, value)
    

    
class MiniNetPlusMeta(nn.Module):
    def __init__(self, emb_szs, hidden=256, hidden2=100, y_range=(1, 10),
                 n_cont=train_torch_data_set_meta.N_conts, n_cat=train_torch_data_set_meta.N_cats):
#     def __init__(self, emb_szs, hidden_sizes=[256, 100], y_range=(1, 10),
#                  n_cont=train_torch_data_set_meta.N_conts, n_cat=train_torch_data_set_meta.N_cats):
    
        super().__init__()
        self.user_embedding = nn.Embedding(emb_szs['n_users'], n_factors)
        self.book_embedding = nn.Embedding(emb_szs['n_books'], n_factors)
        self.author_embedding = nn.Embedding(emb_szs['Book_Author'], n_factors)
        self.publisher_embedding = nn.Embedding(emb_szs['Publisher'], n_factors)
        self.embeddings = [self.user_embedding, self.book_embedding, self.author_embedding, self.publisher_embedding]
        
#         self.embeddings = nn.ModuleList([nn.Embedding(x, n_factors) for x in emb_szs.values()])
#         self.linears = nn.ModuleList([nn.Linear((n_factors * n_cat) + n_cont, hidden_sizes[0]), 
#                                       nn.Linear(hidden_sizes[0], hidden_sizes[1]),
#                                       nn.Linear(hidden_sizes[1], 1)])
        
        self.linear1 = nn.Linear((n_factors * n_cat) + n_cont, hidden)
        self.linear2 = nn.Linear(hidden, hidden2)
        self.linear3 = nn.Linear(hidden2, 1)
        self.linears = [self.linear1, self.linear2, self.linear3]
        self.bn = nn.BatchNorm1d(n_cont)
        self.emb_drop = nn.Dropout(0.1)
        self.drop1 = nn.Dropout(0.5)
        self.drop2 = nn.Dropout(0.5)
        
        for layer in self.embeddings:
            init_embeddings(layer)
            
        for layer in self.linears:
            nn.init.kaiming_normal(layer.weight.data)
        
        self.n_cont = n_cont
        self.n_cat = n_cat
        self.y_range = y_range
    
#     def forward(self, X):
#         cats, conts = X[:, :self.n_cat], X[:, self.n_cat:]
#         cats, conts = cats.long(), conts.float()
#         cat_list = [self.embeddings[x](cats[:,x]) for x in range(self.n_cat)]
#         X = torch.cat(cat_list, dim=1)
#         X = self.emb_drop(X)
#         if self.n_cont != 0:
#             X2 = self.bn(conts)
#             X = torch.cat([X, X2], dim=1) if self.n_cont != 0 else X2
#         X = self.drop1(X)
#         X = self.drop2(F.relu(self.linear1(X)))
#         return F.sigmoid(self.linear2(X)) * (max_rating - min_rating+1) + min_rating-0.5
    
    def forward(self, X):
        cats, conts = X[:, :self.n_cat], X[:, self.n_cat:]
        cats, conts = cats.long(), conts.float()
        cat_list = [self.embeddings[x](cats[:,x]) for x in range(self.n_cat)]
        X = torch.cat(cat_list, dim=1)
        X = self.emb_drop(X)
        if self.n_cont != 0:
            X2 = self.bn(conts)
            X = torch.cat([X, X2], dim=1) if self.n_cont != 0 else X2
#         X = self.drop1(X)
        X = F.relu(self.linear1(X))
        X = self.drop1(X)
        X = F.relu(self.linear2(X))
        X = self.drop2(X)
        X = self.linear3(X)
#         X = self.drop2(F.relu(self.linear1(X)))
#         return F.sigmoid(self.linear2(X)) * (max_rating - min_rating+1) + min_rating-0.5

        X = F.sigmoid(X)
        X = X * (self.y_range[1] - self.y_range[0])
        X = X + self.y_range[0]
        return X


In [8]:
model = MiniNetPlusMeta(emb_c).to(device)

In [9]:
model

MiniNetPlusMeta(
  (user_embedding): Embedding(77805, 100)
  (book_embedding): Embedding(185973, 100)
  (author_embedding): Embedding(59240, 100)
  (publisher_embedding): Embedding(11095, 100)
  (linear1): Linear(in_features=404, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=1, bias=True)
  (bn): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (emb_drop): Dropout(p=0.1)
  (drop1): Dropout(p=0.5)
  (drop2): Dropout(p=0.5)
)

In [132]:
# import ignite
# from ignite.metrics import RootMeanSquaredError, MeanAbsoluteError
# from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
# import torch
# from ignite.engine.engine import Engine, State, Events

# def apply_to_tensor(input_, func):
#     """
#     Apply a function on a tensor or mapping, or sequence of tensors.
#     """
#     return apply_to_type(input_, torch.Tensor, func)


# def apply_to_type(input_, input_type, func):
#     """
#     Apply a function on a object of `input_type` or mapping, or sequence of objects of `input_type`.
#     """
#     if isinstance(input_, input_type):
#         return func(input_)
#     elif isinstance(input_, string_classes):
#         return input_
#     elif isinstance(input_, collections.Mapping):
#         return {k: apply_to_type(sample, input_type, func) for k, sample in input_.items()}
#     elif isinstance(input_, collections.Sequence):
#         return [apply_to_type(sample, input_type, func) for sample in input_]
#     else:
#         raise TypeError(("input must contain {}, dicts or lists; found {}"
#                          .format(input_type, type(input_))))


# def convert_tensor(input_, device=None, non_blocking=False):
#     """
#     Move tensors to relevant device.
#     """
#     def _func(tensor):
#         return tensor.to(device=device, non_blocking=non_blocking) if device else tensor

#     return apply_to_tensor(input_, _func)

# def _prepare_batch(batch, device=None, non_blocking=False):
#     """
#     Prepare batch for training: pass to a device with options
#     """
#     x, y = batch
#     return (convert_tensor(x, device=device, non_blocking=non_blocking),
#             convert_tensor(y, device=device, non_blocking=non_blocking))


# all_lr = []

# def create_supervised_trainer(model, optimizer, loss_fn,
#                               device=None, non_blocking=False,
#                               prepare_batch=_prepare_batch):
#     if device:
#         model.to(device)

#     def _update(engine, batch):
#         model.train()
#         optimizer.zero_grad()
#         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
# #         users, books, conts = x[:,0], x[:,1], x[:,2:]
# #         users, books, conts, y = users.long(), books.long(), conts.float(), y.float()
#         y_pred = model(x)
# #         y_pred = model(users, books, conts)
# #         y_pred = y_pred.squeeze()
#         loss = loss_fn(y_pred, y)
#         loss.backward()
#         optimizer.step()
#         for param_group in optimizer.param_groups:
#             all_lr.append(param_group['lr'])
#         # scheduler
#         scheduler.step()
#         return loss.item()

#     return Engine(_update)

# n_batches = train_data_loader_meta.dataset.N // batch_size
# cycle_len = 1
# crit = nn.MSELoss()
# optimizer = torch.optim.Adam(get_trainable(model.parameters()), lr=0.001)


# import visdom

# def create_plot_window(vis, xlabel, ylabel, title):
#     return vis.line(X=np.array([1]), Y=np.array([np.nan]), opts=dict(xlabel=xlabel, ylabel=ylabel, title=title))


# vis = visdom.Visdom()


# from torch.optim.lr_scheduler import CosineAnnealingLR
# # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*cycle_len)
# scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*cycle_len)
# PRINT_PERIOD = 2000
# trainer = create_supervised_trainer(model, optimizer, crit, device=device)
# evaluator = create_supervised_evaluator(model, metrics={'RMSE': RootMeanSquaredError(), 'MAE': MeanAbsoluteError()}, 
#                                        device=device)

# train_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Loss')
# train_avg_mae_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Average MAE')
# train_avg_rmse_window = create_plot_window(vis, '#Iterations', 'Accuracy', 'Training Average RMSE')
# val_avg_mae_window = create_plot_window(vis, '#Epochs', 'Loss', 'Validation Average MAE')
# val_avg_rmse_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Validation Average RMSE')

In [10]:
import ignite
from ignite.metrics import RootMeanSquaredError, MeanAbsoluteError
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
import visdom

n_batches = train_data_loader_meta.dataset.N // batch_size
cycle_len = 1
crit = nn.MSELoss()
optimizer = torch.optim.Adam(get_trainable(model.parameters()), lr=0.001)


def create_plot_window(vis, xlabel, ylabel, title):
    return vis.line(X=np.array([1]), Y=np.array([np.nan]), opts=dict(xlabel=xlabel, ylabel=ylabel, title=title))

vis = visdom.Visdom()


from torch.optim.lr_scheduler import CosineAnnealingLR
# scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*cycle_len)
scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*cycle_len)
PRINT_PERIOD = 2000
trainer = create_supervised_trainer(model, optimizer, crit, device=device)
evaluator = create_supervised_evaluator(model, metrics={'RMSE': RootMeanSquaredError(), 'MAE': MeanAbsoluteError()}, 
                                       device=device)

train_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Loss')
train_avg_mae_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Average MAE')
train_avg_rmse_window = create_plot_window(vis, '#Iterations', 'Accuracy', 'Training Average RMSE')
val_avg_mae_window = create_plot_window(vis, '#Epochs', 'Loss', 'Validation Average MAE')
val_avg_rmse_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Validation Average RMSE')


In [11]:
from tensorboardX import SummaryWriter
import datetime    

now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
summary_writer = SummaryWriter(log_dir=f"tf_log/exp_ignite_{now}")

@trainer.on(Events.ITERATION_COMPLETED)
def log_train_loss(engine):
    if (engine.state.iteration - 1) % PRINT_PERIOD == 0:
        print(f'epoch: {engine.state.epoch} -- batch: {engine.state.iteration} -- loss: {engine.state.output:.2f}')
        vis.line(X=np.array([engine.state.iteration]),
                     Y=np.array([engine.state.output]),
                     update='append', win=train_loss_window)

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    evaluator.run(train_data_loader_meta)
    metrics = evaluator.state.metrics
    print(f'Train Results -- Epoch: {trainer.state.epoch}, RMSE: {metrics["RMSE"]:.2f}, MAE: {metrics["MAE"]:.2f}')
    summary_writer.add_scalar('training/rmse', metrics['RMSE'])
    summary_writer.add_scalar('training/mae', metrics['MAE'])
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics["RMSE"]]),
                 win=train_avg_rmse_window, update='append')
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics['MAE']]),
                 win=train_avg_mae_window, update='append')
    

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(test_data_loader_meta)
    metrics = evaluator.state.metrics
    print(f'Validation Results -- Epoch: {trainer.state.epoch}, RMSE: {metrics["RMSE"]:.2f}, MAE: {metrics["MAE"]:.2f}')
    summary_writer.add_scalar('validation/rmse', metrics['RMSE'])
    summary_writer.add_scalar('validation/mae', metrics['MAE'])
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics["RMSE"]]),
                 win=val_avg_rmse_window, update='append')
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics['MAE']]),
                 win=val_avg_mae_window, update='append')   

   
    

In [12]:
trainer.run(train_data_loader_meta, max_epochs=5)

epoch: 1 -- batch: 1 -- loss: 6.97


KeyboardInterrupt: 

In [36]:
import time

import torch.optim as optim

def createLossAndOptimizer(model, learning_rate=0.001):
    
    #Loss function
    loss = torch.nn.MSELoss()
    #Optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    return(loss, optimizer)


def get_data(X, y):
    # first two need to be user_id and book_id, meta after 
    user_ids, book_ids, conts = X[:, 0], X[:, 1], X[:, 2:]
    user_ids = Variable(user_ids.long()).cuda()
    book_ids = Variable(book_ids.long()).cuda()
    conts = Variable(conts.float()).cuda()
    y = Variable(y.float()).cuda()
    return user_ids, book_ids, conts, y

def trainNet(model, batch_size, n_epochs, learning_rate):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    #Get training data
    train_loader = train_data_loader_meta
    n_batches = len(train_loader)
    
    #Create our loss and optimizer functions
    loss, optimizer = createLossAndOptimizer(model, learning_rate)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, (data) in enumerate(train_loader, 0):
            
            #Get inputs
            X, y = data
            
            #Wrap them in a Variable object
            user_ids, book_ids, conts, y = get_data(X, y)
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = model(user_ids, book_ids, conts)
            loss_size = loss(outputs, y)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.data[0]
            total_train_loss += loss_size.data[0]
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        #At the end of the epoch, do a pass on the validation set
        total_val_loss = 0
        for (X, y) in test_data_loader_meta:
            
            #Wrap tensors in Variables
            user_ids, book_ids, conts, y = get_data(X, y)
            
            #Forward pass
            val_outputs = model(user_ids, book_ids, conts)
            val_loss_size = loss(val_outputs, y)
            total_val_loss += val_loss_size.data[0]
            
        print("Validation loss = {:.2f}, RMSE = {:.2f}".format(total_val_loss / len(test_data_loader_meta), np.sqrt(total_val_loss / len(test_data_loader_meta))))
        
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))
    
    
trainNet(model, batch_size=128, n_epochs=5, learning_rate=0.001)

In [37]:
# fit(model, train_data_loader_meta, loss=nn.MSELoss(), epochs=3, save=True, val_loader=test_data_loader_meta, 
#                                                     metrics=[mse, rmse, mae], cycle_len=1, print_period=100)

In [18]:
import numpy as np
from functools import partial
import pandas as pd
import os
from tqdm import tqdm_notebook, tnrange, tqdm
import sys

import torch
from torch import nn
from torch.nn.init import kaiming_normal
import torch.nn.functional as F
from torch.optim import RMSprop
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from cosine_annealing_lr import CosineAnnealingLR

def fit(model, train_loader, loss, opt_fn=None, learning_rate=1e-3, batch_size=64, epochs=1, cycle_len=1, val_loader=None, metrics=None, 
                save=False, save_path='tmp/checkpoint.pth.tar', pre_saved=False, print_period=1000):
        
    if opt_fn:
        optimizer = opt_fn(model.parameters(), lr=learning_rate)
    else:  
        optimizer = RMSprop(model.parameters(), lr=learning_rate)
    # for stepper 
    n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
    scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*cycle_len)
    global all_lr
    all_lr = []
    
    best_val_loss = np.inf
    
    if pre_saved:
        checkpoint = torch.load(save_path)
        start_epoch = checkpoint['epoch']
        best_val_loss = checkpoint['best_val_loss']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print('...restoring model...')
    begin = True
    
    for epoch_ in tnrange(1, epochs+1, desc='Epoch'):
        
        if pre_saved:      
            if begin:
                epoch = start_epoch
                begin = False
        else:
            epoch = epoch_
        
        # training
        train_loss = train(model, train_loader, optimizer, scheduler, loss, print_period)
        
        print_output = [epoch, train_loss]
        
        # validation
        if val_loader:
            val_loss = validate(model, val_loader, optimizer, loss, metrics)
            if val_loss[0] < best_val_loss:
                best_val_loss = val_loss[0]
                
                # save model     
                if save:
                    if save_path:
                        ensure_dir(save_path)
                        state = {
                            'epoch': epoch,
                            'state_dict': model.state_dict(),
                            'best_val_loss': best_val_loss,
                            'optimizer': optimizer.state_dict()
                        }
                        save_checkpoint(state, save_path=save_path)
                        
            for i in val_loss: print_output.append(i)

        # epoch, train loss, val loss, metrics (optional)
        print('\n', print_output)
        # sys.stdout.write('\r' + str(print_output))

        # reset scheduler
        if epoch_ % cycle_len == 0:
            scheduler = scheduler._reset(epoch, T_max=n_batches*cycle_len)
        
        epoch += 1
    
    
def get_data(X, y):
    # first two need to be user_id and book_id, meta after 
    user_ids, book_ids, conts = X[:, 0], X[:, 1], X[:, 2:]
    user_ids = Variable(user_ids.long()).cuda()
    book_ids = Variable(book_ids.long()).cuda()
    conts = Variable(conts.float()).cuda()
    y = Variable(y.float()).cuda()
    return user_ids, book_ids, conts, y


def train(model, train_loader, optimizer, scheduler, loss, print_period=1000):

    # change this to show expontially weighted moving average
    # avg_loss = avg_loss * avg_mom + loss * (1-avg_mom)
    epoch_loss = 0.
    n_batches = int(train_loader.dataset.N / train_loader.batch_size)
    model.train()
    
    for i, (X, y) in enumerate(train_loader):
        optimizer.zero_grad()
        user_ids, book_ids, conts, y = get_data(X, y)

        y_hat = model(user_ids, book_ids, conts)
        l = loss(y_hat, y)
        epoch_loss += l.data[0]

        l.backward()
        optimizer.step()
        # scheduler
        scheduler.step()
        all_lr.append(scheduler.get_lr())

        if i != 0 and i % print_period == 0:
            # sys.stdout.write('\r' + 'iteration: {} of n_batches: {}'.format(i, n_batches))
            # sys.stdout.flush()
            # print('iteration: {} of n_batches: {}'.format(i, n_batches))
            statement = '[{}/{} ({:.0f}%)]'.format(i, n_batches, (i / n_batches)*100.)
            sys.stdout.write('\r' + statement)
            sys.stdout.flush()

#     train_loss = epoch_loss / n_batches
    train_loss = epoch_loss / train_loader.dataset.N
    
    return train_loss


def validate(model, val_loader, optimizer, loss, metrics=None):
    model.eval()
    n_batches = int(val_loader.dataset.N / val_loader.batch_size)
    total_loss = 0.
    metric_scores = {}
    if metrics:
        for metric in metrics:
            metric_scores[str(metric)] = []
            
    for i, (X_test, y_test) in enumerate(val_loader):
        user_ids, book_ids, conts, y_test = get_data(X_test, y_test)
        y_hat = model(user_ids, book_ids, conts)

        l = loss(y_hat, y_test)
        total_loss += l.data[0]

        if metrics:
            for metric in metrics:
                metric_scores[str(metric)].append(metric(y_test.data.cpu().numpy(), y_hat.data.cpu().numpy()))
    if metrics:
        final_metrics = []
        for metric in metrics:
#             final_metrics.append(np.sum(metric_scores[str(metric)]) / n_batches)
            final_metrics.append(np.sum(metric_scores[str(metric)]) / val_loader.dataset.N)
#         return total_loss / n_batches, final_metrics
        return total_loss / val_loader.dataset.N, final_metrics
    else:
#         return total_loss / n_batches
        return total_loss / val_loader.dataset.N


def save_checkpoint(state, save_path='tmp/checkpoint.pth.tar'):
    torch.save(state, save_path)

# def predict(model, df, cat_flds, cont_flds):
#     model.eval()

#     cats = np.asarray(df[cat_flds], dtype=np.int64)
#     conts = np.asarray(df[cont_flds], dtype=np.float32)
#     x_cat = Variable(torch.from_numpy(cats))
#     x_cont = Variable(torch.from_numpy(conts))
#     pred = model(x_cat, x_cont)
#     return pred.data.numpy().flatten()

def load_model(model, save_path='tmp/checkpoint.pth.tar'):
    checkpoint = torch.load(save_path)
    model.load_state_dict(checkpoint['state_dict'])
    return model

def save_model(model, save_path='tmp/checkpoint.pth.tar'):
    model.save_state_dict(save_path)

def ensure_dir(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

In [9]:
from sklearn.model_selection import train_test_split

u_uniq = ratings.User_ID.unique()
user2idx = {o:i for i,o in enumerate(u_uniq)}
ratings['New_User_ID'] = ratings.User_ID.apply(lambda x: user2idx[x])

m_uniq = ratings.ISBN.unique()
book2idx = {o:i for i,o in enumerate(m_uniq)}
ratings['New_Book_ID'] = ratings.ISBN.apply(lambda x: book2idx[x])

n_users = int(ratings.New_User_ID.nunique())
n_books = int(ratings.New_Book_ID.nunique())

train, test = train_test_split(ratings, test_size=0.2, random_state=100)

In [17]:
# Training helpers
def get_trainable(model_params):
    return (p for p in model_params if p.requires_grad)


def get_frozen(model_params):
    return (p for p in model_params if not p.requires_grad)


def all_trainable(model_params):
    return all(p.requires_grad for p in model_params)


def all_frozen(model_params):
    return all(not p.requires_grad for p in model_params)


def freeze_all(model_params):
    for param in model_params:
        param.requires_grad = False


In [20]:
get_trainable(model.parameters())

<generator object get_trainable.<locals>.<genexpr> at 0x7f95866617d8>

In [205]:
from torch.utils.data import DataLoader, Dataset

class TorchDataSet(Dataset):
    def __init__(self, ratings):
        self.X = ratings[['New_User_ID', 'New_Book_ID']].values
        self.y = ratings['Book_Rating'].values
        self.N = len(self.y)
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
batch_size = 32
train_torch_data_set = TorchDataSet(train)
test_torch_data_set = TorchDataSet(test)
train_data_loader = DataLoader(train_torch_data_set, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test_torch_data_set, batch_size=batch_size, shuffle=False)

In [233]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

n_factors = 50
max_rating = float(train.Book_Rating.max())
min_rating = float(train.Book_Rating.min())

def mse(x, y):
    return np.sqrt(((x-y)**2).mean())

def rmse(x, y): 
    return np.sqrt(mse(x, y))

def mae(x, y): 
    return np.abs((x-y)).mean()


metrics=[mse, rmse, mae]


class MiniNet(nn.Module):
    
    def __init__(self, n_users, n_books, hidden=100):
        super().__init__()
        self.user_embedding = nn.Embedding(n_users, n_factors)
        self.book_embedding = nn.Embedding(n_books, n_factors)
        self.linear1 = nn.Linear(n_factors * 2, hidden)
        self.linear2 = nn.Linear(hidden, 1)
        self.drop1 = nn.Dropout(0.75)
        self.drop2 = nn.Dropout(0.75)
        
        self.user_embedding.weight.data.uniform_(-0.01,0.01)
        self.book_embedding.weight.data.uniform_(-0.01,0.01)       
    
    def forward(self, users, books):
        u = self.user_embedding(users)
        b = self.book_embedding(books)
        X = self.drop1(torch.cat([u, b], dim=1))
        X = self.drop2(F.relu(self.linear1(X)))
        return F.sigmoid(self.linear2(X)) * (max_rating - min_rating+1) + min_rating-0.5
    
    

    

class EmbeddingDotBias(nn.Module):
    
    def __init__(self, n_users, n_books):
        super().__init__()
        self.user_embedding = nn.Embedding(n_users, n_factors)
        self.book_embedding = nn.Embedding(n_books, n_factors)
        self.user_bias = nn.Embedding(n_users, 1)
        self.book_bias = nn.Embedding(n_books, 1)
        
        self.user_embedding.weight.data.uniform_(-0.01,0.01)
        self.book_embedding.weight.data.uniform_(-0.01,0.01)       
        self.user_bias.weight.data.uniform_(-0.01,0.01)
        self.book_bias.weight.data.uniform_(-0.01,0.01)
    
    def forward(self, users, books):
        u = self.user_embedding(users)
        b = self.book_embedding(books)
#         print(self.user_bias(users).size())
        u_b = self.user_bias(users).squeeze()
#         print(u.size(), u_b.size())
        b_b = self.book_bias(books).squeeze()
        X = ( (u * b).sum(1) ) + u_b + b_b
        X = F.sigmoid(X) * (max_rating - min_rating) + max_rating
        return X.view(-1, 1)
    
    
class EmbeddingDot(nn.Module):
    
    def __init__(self, n_users, n_books):
        super().__init__()
        self.user_embedding = nn.Embedding(n_users, n_factors)
        self.book_embedding = nn.Embedding(n_books, n_factors)
        self.user_embedding.weight.data.uniform_(0, 0.05)
        self.book_embedding.weight.data.uniform_(0, 0.05)
    
    def forward(self, users, books):
        u = self.user_embedding(users)
        b = self.book_embedding(books)
        return (u * b).sum(1)

In [234]:
from torch.utils.data import DataLoader, Dataset

class TorchDataSet(Dataset):
    def __init__(self, ratings):
        self.X = ratings[['New_User_ID', 'New_Book_ID']].values
        self.y = ratings['Book_Rating'].astype(np.float32).values
        self.N = len(self.y)
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
train_torch_data_set = TorchDataSet(train)
test_torch_data_set = TorchDataSet(test)
train_data_loader = DataLoader(train_torch_data_set, batch_size=32, shuffle=True)
test_data_loader = DataLoader(test_torch_data_set, batch_size=32, shuffle=False)

class EmbeddingDotBias(nn.Module):
    
    def __init__(self, n_users, n_books):
        super().__init__()
        self.user_embedding = nn.Embedding(n_users, n_factors)
        self.book_embedding = nn.Embedding(n_books, n_factors)
        self.user_bias = nn.Embedding(n_users, 1)
        self.book_bias = nn.Embedding(n_books, 1)
        
        self.user_embedding.weight.data.uniform_(-0.01,0.01)
        self.book_embedding.weight.data.uniform_(-0.01,0.01)       
        self.user_bias.weight.data.uniform_(-0.01,0.01)
        self.book_bias.weight.data.uniform_(-0.01,0.01)
    
    def forward(self, X):
        users, books = X[:, 0], X[:, 1]
        u = self.user_embedding(users)
        b = self.book_embedding(books)
        u_b = self.user_bias(users).squeeze()
        b_b = self.book_bias(books).squeeze()
        X = ( (u * b).sum(1) ) + u_b + b_b
        X = F.sigmoid(X) * (max_rating - min_rating) + max_rating
        return X.view(-1, 1)

In [235]:
device = 'cuda'
model = EmbeddingDotBias(n_users, n_books).to(device)

In [236]:
import ignite
from ignite.metrics import RootMeanSquaredError, MeanAbsoluteError
from ignite.engine import Engine, create_supervised_evaluator, Events

import torch

from ignite.engine.engine import Engine, State, Events

def apply_to_tensor(input_, func):
    """Apply a function on a tensor or mapping, or sequence of tensors.
    """
    return apply_to_type(input_, torch.Tensor, func)


def apply_to_type(input_, input_type, func):
    """Apply a function on a object of `input_type` or mapping, or sequence of objects of `input_type`.
    """
    if isinstance(input_, input_type):
        return func(input_)
    elif isinstance(input_, string_classes):
        return input_
    elif isinstance(input_, collections.Mapping):
        return {k: apply_to_type(sample, input_type, func) for k, sample in input_.items()}
    elif isinstance(input_, collections.Sequence):
        return [apply_to_type(sample, input_type, func) for sample in input_]
    else:
        raise TypeError(("input must contain {}, dicts or lists; found {}"
                         .format(input_type, type(input_))))


def convert_tensor(input_, device=None, non_blocking=False):
    """Move tensors to relevant device."""
    def _func(tensor):
        return tensor.to(device=device, non_blocking=non_blocking) if device else tensor

    return apply_to_tensor(input_, _func)

def _prepare_batch(batch, device=None, non_blocking=False):
    """Prepare batch for training: pass to a device with options
    """
    x, y = batch
    return (convert_tensor(x, device=device, non_blocking=non_blocking),
            convert_tensor(y, device=device, non_blocking=non_blocking))


all_lr = []

def create_supervised_trainer(model, optimizer, loss_fn,
                              device=None, non_blocking=False,
                              prepare_batch=_prepare_batch):
    if device:
        model.to(device)

    def _update(engine, batch):
        model.train()
        optimizer.zero_grad()
        x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
        y_pred = model(x)
        y_pred = y_pred.squeeze()
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()
        for param_group in optimizer.param_groups:
            all_lr.append(param_group['lr'])
        # scheduler
        scheduler.step()
        return loss.item()

    return Engine(_update)

n_batches = train_data_loader.dataset.N // batch_size
cycle_len = 1
crit = nn.MSELoss()
optimizer = torch.optim.Adam(get_trainable(model.parameters()), lr=0.001)


import visdom

def create_plot_window(vis, xlabel, ylabel, title):
    return vis.line(X=np.array([1]), Y=np.array([np.nan]), opts=dict(xlabel=xlabel, ylabel=ylabel, title=title))


vis = visdom.Visdom()


from torch.optim.lr_scheduler import CosineAnnealingLR
# scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*cycle_len)
scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*cycle_len)
PRINT_PERIOD = 2000
trainer = create_supervised_trainer(model, optimizer, crit, device=device)
evaluator = create_supervised_evaluator(model, metrics={'RMSE': RootMeanSquaredError(), 'MAE': MeanAbsoluteError()}, 
                                       device=device)

train_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Loss')
train_avg_mae_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Average MAE')
train_avg_rmse_window = create_plot_window(vis, '#Iterations', 'Accuracy', 'Training Average RMSE')
val_avg_mae_window = create_plot_window(vis, '#Epochs', 'Loss', 'Validation Average MAE')
val_avg_rmse_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Validation Average RMSE')

In [237]:
from tensorboardX import SummaryWriter
import datetime    

now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
summary_writer = SummaryWriter(log_dir=f"tf_log/exp_ignite_{now}")

@trainer.on(Events.ITERATION_COMPLETED)
def log_train_loss(engine):
    if (engine.state.iteration - 1) % PRINT_PERIOD == 0:
        print(f'epoch: {engine.state.epoch} -- batch: {engine.state.iteration} -- loss: {engine.state.output:.2f}')
        vis.line(X=np.array([engine.state.iteration]),
                     Y=np.array([engine.state.output]),
                     update='append', win=train_loss_window)
        
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    evaluator.run(train_data_loader)
    metrics = evaluator.state.metrics
    print(f'Train Results -- Epoch: {trainer.state.epoch}, RMSE: {metrics["RMSE"]:.2f}, MAE: {metrics["MAE"]:.2f}')
    summary_writer.add_scalar('training/rmse', metrics['RMSE'])
    summary_writer.add_scalar('training/mae', metrics['MAE'])
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics["RMSE"]]),
                 win=val_avg_rmse_window, update='append')
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics['MAE']]),
                 win=val_avg_mae_window, update='append')
    

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(test_data_loader)
    metrics = evaluator.state.metrics
    print(f'Validation Results -- Epoch: {trainer.state.epoch}, RMSE: {metrics["RMSE"]:.2f}, MAE: {metrics["MAE"]:.2f}')
    summary_writer.add_scalar('validation/rmse', metrics['RMSE'])
    summary_writer.add_scalar('validation/mae', metrics['MAE'])
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics["RMSE"]]),
                 win=train_avg_rmse_window, update='append')
    vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics['MAE']]),
                 win=train_avg_mae_window, update='append')            

In [238]:
trainer.run(train_data_loader, max_epochs=3)

epoch: 1 -- batch: 1 -- loss: 55.54
epoch: 1 -- batch: 2001 -- loss: 48.73
epoch: 1 -- batch: 4001 -- loss: 44.13
epoch: 1 -- batch: 6001 -- loss: 33.47
epoch: 1 -- batch: 8001 -- loss: 45.13
epoch: 1 -- batch: 10001 -- loss: 39.71
Train Results -- Epoch: 1, RMSE: 6.22, MAE: 5.85
Validation Results -- Epoch: 1, RMSE: 6.32, MAE: 5.95
epoch: 2 -- batch: 12001 -- loss: 38.95
epoch: 2 -- batch: 14001 -- loss: 36.80
epoch: 2 -- batch: 16001 -- loss: 38.10
epoch: 2 -- batch: 18001 -- loss: 32.88
epoch: 2 -- batch: 20001 -- loss: 32.83
Train Results -- Epoch: 2, RMSE: 5.31, MAE: 4.75
Validation Results -- Epoch: 2, RMSE: 5.64, MAE: 5.10
epoch: 3 -- batch: 22001 -- loss: 29.66
epoch: 3 -- batch: 24001 -- loss: 24.86
epoch: 3 -- batch: 26001 -- loss: 20.27
epoch: 3 -- batch: 28001 -- loss: 19.02
epoch: 3 -- batch: 30001 -- loss: 26.78
epoch: 3 -- batch: 32001 -- loss: 31.84
Train Results -- Epoch: 3, RMSE: 4.63, MAE: 3.95
Validation Results -- Epoch: 3, RMSE: 5.20, MAE: 4.55


<ignite.engine.engine.State at 0x7f9516e0d828>

In [241]:
import visdom
import numpy as np
vis = visdom.Visdom()
# vis.text('Hello, world!')
vis.image(np.ones((3, 10, 10)))

'window_36be1b775885d2'

In [None]:
"schedule": [
          {"learning_rate": 0.1, "epochs": 1},
          {"learning_rate": 0.2, "epochs": 1},
          {"learning_rate": 0.3, "epochs": 1},
          {"learning_rate": 0.4, "epochs": 17},
          {"learning_rate": 0.04, "epochs": 14},
          {"learning_rate": 0.004, "epochs": 8},
          {"learning_rate": 0.0004, "epochs": 3}
        ]

In [106]:
from torch.optim.optimizer import Optimizer


class _LRScheduler(object):
    def __init__(self, optimizer, last_epoch=-1):
        if not isinstance(optimizer, Optimizer):
            raise TypeError('{} is not an Optimizer'.format(
                type(optimizer).__name__))
        self.optimizer = optimizer
        if last_epoch == -1:
            for group in optimizer.param_groups:
                group.setdefault('initial_lr', group['lr'])
        else:
            for i, group in enumerate(optimizer.param_groups):
                if 'initial_lr' not in group:
                    raise KeyError("param 'initial_lr' is not specified "
                                   "in param_groups[{}] when resuming an optimizer".format(i))
        self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
        self.step(last_epoch + 1)
        self.last_epoch = last_epoch

    def get_lr(self):
        raise NotImplementedError

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
        self.last_epoch = epoch
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

class CosineAnnealingLR(_LRScheduler):
    def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
        self.T_max = T_max
        self.eta_min = eta_min
        self.optimizer = optimizer
        super(CosineAnnealingLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        return [self.eta_min + (base_lr - self.eta_min) *
                (1 + np.cos(np.pi * self.last_epoch / self.T_max)) / 2
                for base_lr in self.base_lrs]
    
    def _reset(self, epoch, T_max):
        """
        Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        return CosineAnnealingLR(self.optimizer, self.T_max, self.eta_min, last_epoch=epoch)


In [None]:
OPTIM = Adam(
    params=[
        {"params": MODEL.features.parameters(), 'lr': 0.001},
        {"params": MODEL.classifier.parameters(), 'lr': 0.001},
        {"params": MODEL.final_classifiers.parameters(), 'lr': 0.001},
    ],
)

def lambda_lr_features(epoch):
    if epoch < 5:
        return 0.001
    else:
        return 0.1 * (0.75 ** (epoch - 3))

def lambda_lr_classifier(epoch):
    if epoch < 5:
        return 0.01
    else:
        return 0.75 ** (epoch - 3)

def lambda_lr_final_classifiers(epoch):
    if epoch < 5:
        return 1.0
    else:
        return 0.88 ** (epoch - 3)

LR_SCHEDULERS = [
    LambdaLR(OPTIM, lr_lambda=[lambda_lr_features, lambda_lr_classifier, lambda_lr_final_classifiers])
]