In [13]:
from __future__ import print_function
import argparse

import torch
import torch.optim as optim

from utils.optimizer import AdamNormGrad

import os
import numpy as np
import datetime

from utils.load_data import load_dataset


In [2]:
# Training settings
parser = argparse.ArgumentParser(description='VAE+VampPrior')
# arguments for optimization
parser.add_argument('--batch_size', type=int, default=200, metavar='BStrain',
                    help='input batch size for training (default: 200)')
parser.add_argument('--test_batch_size', type=int, default=1000, metavar='BStest',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=400, metavar='E',
                    help='number of epochs to train (default: 400)')
parser.add_argument('--lr', type=float, default=0.0005, metavar='LR',
                    help='learning rate (default: 0.0005)')
parser.add_argument('--early_stopping_epochs', type=int, default=50, metavar='ES',
                    help='number of epochs for early stopping')

parser.add_argument('--warmup', type=int, default=100, metavar='WU',
                    help='number of epochs for warm-up')
parser.add_argument('--max_beta', type=float, default=1., metavar='B',
                    help='maximum value of beta for training')

# cuda
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='enables CUDA training')
# random seed
parser.add_argument('--seed', type=int, default=14, metavar='S',
                    help='random seed (default: 14)')

# model: latent size, input_size, so on
parser.add_argument('--num_layers', type=int, default=1, metavar='NL',
                    help='number of layers')

parser.add_argument('--z1_size', type=int, default=200, metavar='M1',
                    help='latent size')
parser.add_argument('--z2_size', type=int, default=200, metavar='M2',
                    help='latent size')
parser.add_argument('--hidden_size', type=int, default=600 , metavar="H",
                    help='the width of hidden layers')
parser.add_argument('--input_size', type=int, default=[1, 28, 28], metavar='D',
                    help='input size')

parser.add_argument('--activation', type=str, default=None, metavar='ACT',
                    help='activation function')

parser.add_argument('--number_components', type=int, default=1000, metavar='NC',
                    help='number of pseudo-inputs')
parser.add_argument('--pseudoinputs_mean', type=float, default=0.05, metavar='PM',
                    help='mean for init pseudo-inputs')
parser.add_argument('--pseudoinputs_std', type=float, default=0.01, metavar='PS',
                    help='std for init pseudo-inputs')

parser.add_argument('--use_training_data_init', action='store_true', default=False,
                    help='initialize pseudo-inputs with randomly chosen training data')

# model: model name, prior
parser.add_argument('--model_name', type=str, default='vamp', metavar='MN',
                    help='model name: baseline, vamp, hvamp, hvamp1')

parser.add_argument('--input_type', type=str, default='binary', metavar='IT',
                    help='type of the input: binary, gray, continuous, multinomial')

parser.add_argument('--gated', action='store_true', default=False,
                    help='use gating mechanism')

# experiment
parser.add_argument('--S', type=int, default=5000, metavar='SLL',
                    help='number of samples used for approximating log-likelihood')
parser.add_argument('--MB', type=int, default=100, metavar='MBLL',
                    help='size of a mini-batch used for approximating log-likelihood')

# dataset
parser.add_argument('--dataset_name', type=str, default='netflix', metavar='DN',
                    help='name of the dataset:  ml20m, netflix, pinterest')

parser.add_argument('--dynamic_binarization', action='store_true', default=False,
                    help='allow dynamic binarization')

# note
parser.add_argument('--note', type=str, default="none", metavar='NT',
                    help='additional note on the experiment')
parser.add_argument('--no_log', action='store_true', default=False,
                    help='print log to log_dir')

args = parser.parse_args([])
args.cuda = not args.no_cuda and torch.cuda.is_available()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {}  #! Changed num_workers: 1->0 because of error


In [3]:
args.input_size

[1, 28, 28]

In [5]:
args.dataset_name,args.model_name

('netflix', 'vamp')

In [6]:
args.model_signature = str(datetime.datetime.now())[0:10]

model_name = args.dataset_name + '_' + args.model_name + '_' + \
                '(K_' + str(args.number_components) + ')' + \
                '_' + args.input_type + '_beta(' + str(args.max_beta) + ')' + \
                '_layers(' + str(args.num_layers) + ')' + '_hidden(' + str(args.hidden_size) + ')' + \
                '_z1(' + str(args.z1_size) + ')' + '_z2(' + str(args.z2_size) + ')'

# DIRECTORY FOR SAVING
snapshots_path = 'snapshots/'
dir = snapshots_path + args.model_signature + '_' + model_name + '/'

if not os.path.exists(dir):
    os.makedirs(dir)

# LOAD DATA=========================================================================================================
print('load data')

# loading data
train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)


load data


In [7]:
#def run(args, kwargs):

# CREATE MODEL======================================================================================================
print('create model')
# importing model
if args.model_name == 'baseline':
    from models.Baseline import VAE
elif args.model_name == 'vamp':
    from models.Vamp import VAE
elif args.model_name == 'hvamp':
    from models.HVamp import VAE
elif args.model_name == 'hvamp1':
    from models.HVamp_1layer import VAE
else:
    raise Exception('Wrong name of the model!')

model = VAE(args)
if args.cuda:
    model.cuda()

optimizer = AdamNormGrad(model.parameters(), lr=args.lr)

# ======================================================================================================================
print(args)
log_dir = "vae_experiment_log_" + str(os.getenv("COMPUTERNAME")) +".txt"

open(log_dir, 'a').close()


create model
Namespace(MB=100, S=5000, activation=None, batch_size=200, cuda=False, dataset_name='netflix', dynamic_binarization=False, early_stopping_epochs=50, epochs=400, gated=False, hidden_size=600, input_size=[1, 1, 7738], input_type='binary', lr=0.0005, max_beta=1.0, model_name='vamp', model_signature='2023-05-12', no_cuda=False, no_log=False, note='none', num_layers=1, number_components=1000, pseudoinputs_mean=0.05, pseudoinputs_std=0.01, seed=14, test_batch_size=1000, use_training_data_init=False, warmup=100, z1_size=200, z2_size=200)


In [14]:
from utils.training import train_vae as train
from utils.evaluation import evaluate_vae as evaluate

In [9]:

# ======================================================================================================================
print('perform experiment')
model_name = args.model_name
dataset_name = args.dataset_name

torch.save(args, dir + args.model_name + '.config')

# best_model = model
best_ndcg = 0.
e = 0
last_epoch = 0

train_loss_history = []
train_re_history = []
train_kl_history = []

val_loss_history = []
val_re_history = []
val_kl_history = []

val_ndcg_history = []

time_history = []
# ======================================================================================================================


perform experiment


In [10]:
args.input_size

[1, 1, 7738]

In [11]:
model

VAE(
  (q_z_layers): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): NonLinear(
      (activation): Tanh()
      (linear): Linear(in_features=7738, out_features=600, bias=True)
    )
  )
  (q_z_mean): Linear(in_features=600, out_features=200, bias=True)
  (q_z_logvar): NonLinear(
    (activation): Hardtanh(min_val=-12.0, max_val=4.0)
    (linear): Linear(in_features=600, out_features=200, bias=True)
  )
  (p_x_layers): Sequential(
    (0): NonLinear(
      (activation): Tanh()
      (linear): Linear(in_features=200, out_features=600, bias=True)
    )
  )
  (p_x_mean): NonLinear(
    (activation): Sigmoid()
    (linear): Linear(in_features=600, out_features=7738, bias=True)
  )
  (means): NonLinear(
    (activation): Hardtanh(min_val=0.0, max_val=1.0)
    (linear): Linear(in_features=1000, out_features=7738, bias=False)
  )
)

In [34]:
import time
import math
args.epochs = 2
for epoch in range(1, args.epochs + 1):
    time_start = time.time()
    model, train_loss_epoch, train_re_epoch, train_kl_epoch = train(epoch, args, train_loader, model,
                                                                            optimizer)

    val_loss_epoch, val_re_epoch, val_kl_epoch, val_ndcg_epoch = evaluate_vae(args, model, train_loader, val_loader, epoch, dir, mode='validation')
    time_end = time.time()

    time_elapsed = time_end - time_start

    # appending history
    train_loss_history.append(train_loss_epoch), train_re_history.append(train_re_epoch), train_kl_history.append(
        train_kl_epoch)
    val_loss_history.append(val_loss_epoch), val_re_history.append(val_re_epoch), val_kl_history.append(
        val_kl_epoch), val_ndcg_history.append(val_ndcg_epoch)
    time_history.append(time_elapsed)

    # printing results
    print('Epoch: {}/{}, Time elapsed: {:.2f}s\n'
            '* Train loss: {:.2f}   (RE: {:.2f}, KL: {:.2f})\n'
            'o Val.  loss: {:.2f}   (RE: {:.2f}, KL: {:.2f}, NDCG: {:.5f})\n'
            '--> Early stopping: {}/{} (BEST: {:.5f})\n'.format(
        epoch, args.epochs, time_elapsed,
        train_loss_epoch, train_re_epoch, train_kl_epoch,
        val_loss_epoch, val_re_epoch, val_kl_epoch, val_ndcg_epoch,
        e, args.early_stopping_epochs, best_ndcg
    ))

    # early-stopping
    last_epoch = epoch
    if val_ndcg_epoch > best_ndcg:
        e = 0
        best_ndcg = val_ndcg_epoch
        # best_model = model
        print('->model saved<-')
        torch.save(model, dir + args.model_name + '.model')
    else:
        e += 1
        if epoch < args.warmup:
            e = 0
        if e > args.early_stopping_epochs:
            break

    # NaN
    if math.isnan(val_loss_epoch):
        break


beta: 0.01
Epoch: 1/2, Time elapsed: 124.47s
* Train loss: 202.76   (RE: 202.70, KL: 5.50)
o Val.  loss: 105.93   (RE: 106.55, KL: -0.61, NDCG: 0.13753)
--> Early stopping: 0/50 (BEST: 0.00000)

->model saved<-
beta: 0.02
Epoch: 2/2, Time elapsed: 106.37s
* Train loss: 114.04   (RE: 113.88, KL: 7.85)
o Val.  loss: 94.55   (RE: 91.85, KL: 2.70, NDCG: 0.14973)
--> Early stopping: 0/50 (BEST: 0.13753)

->model saved<-


In [31]:
import torch
from torch.autograd import Variable
import bottleneck as bn
def evaluate_vae(args, model, train_loader, data_loader, epoch, dir, mode):
    # set loss to 0
    evaluate_loss = 0
    evaluate_re = 0
    evaluate_kl = 0

    ndcg_dist = torch.tensor([], dtype=torch.float)
    if mode == 'test':
        ndcg_20 = torch.tensor([], dtype=torch.float)
        ndcg_10 = torch.tensor([], dtype=torch.float)
        recall_50 = torch.tensor([], dtype=torch.float)
        recall_20 = torch.tensor([], dtype=torch.float)
        recall_10 = torch.tensor([], dtype=torch.float)
        recall_5 = torch.tensor([], dtype=torch.float)
        recall_1 = torch.tensor([], dtype=torch.float)

    # set model to evaluation mode
    model.eval()

    # Functions for Evaluation

    def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
        '''
        normalized discounted cumulative gain@k for binary relevance
        ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
        '''
        batch_users = X_pred.shape[0]
        idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
        topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                           idx_topk_part[:, :k]]
        idx_part = np.argsort(-topk_part, axis=1)
        # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
        # topk predicted score
        idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
        # build the discount template
        tp = 1. / np.log2(np.arange(2, k + 2))
        tp = torch.tensor(tp, dtype=torch.float)  # ! in order to do operations with torch tensor

        DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                             idx_topk].cpu() * tp).sum(dim=1)
        IDCG = torch.tensor([(tp[:min(n, k)]).sum()
                             for n in (heldout_batch != 0).sum(dim=1)])
        #print(DCG, IDCG)
        return DCG / IDCG

    def Recall_at_k_batch(X_pred, heldout_batch, k=100):
        batch_users = X_pred.shape[0]

        idx = bn.argpartition(-X_pred, k, axis=1)
        X_pred_binary = np.zeros_like(X_pred, dtype=bool)
        X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

        X_true_binary = torch.tensor((heldout_batch > 0), dtype=torch.float)
        tmp = torch.tensor(np.logical_and(X_true_binary, X_pred_binary), dtype=torch.float).sum(dim=1)
        recall = tmp / np.minimum(k, X_true_binary.sum(dim=1))
        return recall

    # evaluate
    for batch_idx, (train, test) in enumerate(data_loader):
        if args.cuda:
            train, test = train.cuda(), test.cuda()
        train, test = Variable(train), Variable(test) #! volatile deprecated

        x = train

        with torch.no_grad():
            # calculate loss function
            loss, RE, KL = model.calculate_loss(x, average=True)

            evaluate_loss += loss.data.item()
            evaluate_re += -RE.data.item()
            evaluate_kl += KL.data.item()

            # Calculate NDCG & Recall
            pred_val = model.reconstruct_x(x)
            # should be removed if not necessary
            pred_val = np.array(pred_val)
            x = np.array(x)
            pred_val[x.nonzero()] = -np.inf

            ndcg_dist = torch.cat([ndcg_dist, NDCG_binary_at_k_batch(pred_val, test, k=100)])

            if mode == 'test':
                ndcg_20 = torch.cat([ndcg_20, NDCG_binary_at_k_batch(pred_val, test, k=20)])
                ndcg_10 = torch.cat([ndcg_10, NDCG_binary_at_k_batch(pred_val, test, k=10)])
                recall_50 = torch.cat([recall_50, Recall_at_k_batch(pred_val, test, k=50)])
                recall_20 = torch.cat([recall_20, Recall_at_k_batch(pred_val, test, k=20)])
                recall_10 = torch.cat([recall_10, Recall_at_k_batch(pred_val, test, k=10)])
                recall_5 = torch.cat([recall_5, Recall_at_k_batch(pred_val, test, k=5)])
                recall_1 = torch.cat([recall_1, Recall_at_k_batch(pred_val, test, k=1)])



    # calculate final loss
    evaluate_loss /= len(data_loader)  # loss function already averages over batch size
    evaluate_re /= len(data_loader)  # re already averages over batch size
    evaluate_kl /= len(data_loader)  # kl already averages over batch size
    #print(ndcg_dist)
    evaluate_ndcg = ndcg_dist.nanmean().data.item()

    if mode == 'test':
        eval_ndcg100 = "{:.5f}({:.4f})".format(evaluate_ndcg, ndcg_dist.std().data.item()/np.sqrt(len(ndcg_dist)))
        eval_ndcg20 = "{:.5f}({:.4f})".format(ndcg_20.mean().data.item(),ndcg_20.std().data.item()/np.sqrt(len(ndcg_20)))
        eval_ndcg10 = "{:.5f}({:.4f})".format(ndcg_10.mean().data.item(),ndcg_10.std().data.item()/np.sqrt(len(ndcg_10)))
        eval_recall50 = "{:.5f}({:.4f})".format(recall_50.mean().data.item(),recall_50.std().data.item()/np.sqrt(len(recall_50)))
        eval_recall20 = "{:.5f}({:.4f})".format(recall_20.mean().data.item(),recall_20.std().data.item()/np.sqrt(len(recall_20)))
        eval_recall10 = "{:.5f}({:.4f})".format(recall_10.mean().data.item(),recall_10.std().data.item()/np.sqrt(len(recall_10)))
        eval_recall5 = "{:.5f}({:.4f})".format(recall_5.mean().data.item(),recall_5.std().data.item()/np.sqrt(len(recall_5)))
        eval_recall1 = "{:.5f}({:.4f})".format(recall_1.mean().data.item(),recall_1.std().data.item()/np.sqrt(len(recall_1)))



    if mode == 'test':
        return evaluate_loss, evaluate_re, evaluate_kl, eval_ndcg100, \
               eval_ndcg20, eval_ndcg10, eval_recall50, eval_recall20, eval_recall10, eval_recall5, eval_recall1
    else:
        return evaluate_loss, evaluate_re, evaluate_kl, evaluate_ndcg


In [32]:
val_loss_epoch, val_re_epoch, val_kl_epoch, val_ndcg_epoch = evaluate_vae(args, model, train_loader, val_loader, epoch, dir, mode='validation')
    

In [33]:
val_ndcg_epoch
#evaluate?

0.02525036782026291

In [6]:
from scipy.special import logsumexp