# Variational Autoencoder for Collaborative Filtering - single GPU, FP32 only

### Imports

In [1]:
try:
    autoreload_guard
except:
    %load_ext autoreload
    %autoreload 2
    autoreload_guard = None


import os
import shutil
import sys


from functools import partial
import numpy as np
from scipy import sparse

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sn
sn.set()

import pandas as pd

import bottleneck as bn

from vae.models.Mult_VAE_training import VAE
from vae.load.generic import load_dataset

from vae.metrics.ndcg import ndcg
from vae.metrics.recall import recall
from vae.utils.round import round_8

### Loading data and spliting it to train, validation and test

In [9]:
ratio_10k_items_ml20m = 0.0731651997
# train_data, validation_data_input, validation_data_true, test_data_input, test_data_true = load_dataset("ml-20m",val_ratio=ratio_10k_items_ml20m, test_ratio=ratio_10k_items_ml20m, min_items=5, min_users=0)
train_data, validation_data_input, validation_data_true, test_data_input, test_data_true = load_dataset("ml-20m-paper")

[VAE| INFO]: Parsing movielens.
[VAE| INFO]: Done


### Enabling AMP

In [3]:
os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'

In [4]:
# make sure all dims and batch sizes are divisible by 8
# without this tensor cores can't be used in AMP

_, n_items = train_data.shape
n_items = round_8(n_items)

for m in train_data, validation_data_input, validation_data_true, test_data_input, test_data_true:
    x, _ = m.shape
    m.resize(x, n_items)

### Preparing batch sizes

In [5]:
batch_size_train = round_8(int(1e4))
batch_size_validation = round_8(int(1e4))

### Configuring annealing

In [6]:
# the total number of gradient updates for annealing
total_anneal_steps = 200000
# largest annealing parameter
anneal_cap = 0.2

### Setting NN layers

In [7]:
_, nitems = train_data.shape
encoder_dims = [nitems, round_8(600), round_8(200)]

### Initializing model

In [8]:
try:  # to prevent dangling session
    vae.close_session()
except NameError:
    pass

vae = VAE(
    train_data, 
    encoder_dims,
    total_anneal_steps=total_anneal_steps,
    anneal_cap=anneal_cap,
    batch_size_train=batch_size_train,
    batch_size_validation=batch_size_validation,
    lam=1e-2,
    lr=1e-3,
    device='/device:GPU:1'
)

AttributeError: module 'tensorflow._api.v1.sparse' has no attribute 'sparse_dense_matmul'

### Choosing number of epochs

In [None]:
n_epochs = 20

### Metrics

In [None]:
metrics = {'ndcg@100': partial(ndcg, R=100),
           'recall@20': partial(recall, R=20),
           'recall@50': partial(recall, R=50),
          }

### Running model

In [None]:
vae.train(
    n_epochs=n_epochs,
    train_data=train_data,
    validation_data_input=test_data_input,
    validation_data_true=test_data_true,
    batch_size_train=batch_size_train,
    batch_size_validation=batch_size_validation,
    metrics=metrics,
    validation_step=20,
)

### Saving the model

In [None]:
# from tensorflow.python.saved_model.simple_save import simple_save
# simple_save(vae.session,
#                     "export_dir",
#                     inputs={"inputs_validation": vae.inputs_validation},
#                     outputs={"logits_validation": vae.logits_validation})

### Testing

In [None]:
test_results = vae.test(
    test_data_input=test_data_input,
    test_data_true=test_data_true,
    metrics=metrics,
)

In [None]:
for k,v in test_results.items():
    print("{}:\t{}".format(k,v))