In [2]:
import os
GPU_ID = 0
os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU_ID)

import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm, notebook

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

import random
import time
import sys
import csv
import pickle
import pandas as pd

from tensorboardX import SummaryWriter
from datetime import datetime
from transformers import *
from torchtext.data import Field

In [3]:
import params as pm
import paths_args as ag
from recsys_lib20 import get_nn_embeddings, acquire_dataloader, ESCOFILT, count_parameters, epoch_time
from general_codes import train_NCF_only, evaluate_NCF_only

In [4]:
torch.manual_seed(pm.SEED_CONST)
torch.cuda.manual_seed(pm.SEED_CONST)
np.random.seed(pm.SEED_CONST)
random.seed(pm.SEED_CONST)
torch.backends.cudnn.deterministic = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print (device)

cuda


In [5]:

input_trn_file = ag.CSV_PATH.format(pm.DOMAIN, "train")
input_vld_file = ag.CSV_PATH.format(pm.DOMAIN, "valid")
input_tst_file = ag.CSV_PATH.format(pm.DOMAIN, "test")

print ("T/T/V dataset splits will be read from the ff.:")
print (" --- ", input_trn_file)
print (" --- ", input_vld_file)
print (" --- ", input_tst_file)


T/T/V dataset splits will be read from the ff.:
 ---  ../shared_datasets/auto_dataset/converted_csv/train.csv
 ---  ../shared_datasets/auto_dataset/converted_csv/valid.csv
 ---  ../shared_datasets/auto_dataset/converted_csv/test.csv


In [6]:
in_user_pk_path = ag.OUT_PICKLE_PATH.format(pm.DOMAIN, pm.EMB_TYPE, "user", pm.USER_RATIO)
in_item_pk_path = ag.OUT_PICKLE_PATH.format(pm.DOMAIN, pm.EMB_TYPE, "item", pm.ITEM_RATIO)

print ("Initial embeddings will be read from: ")
print (" --- ", in_user_pk_path)
print (" --- ", in_item_pk_path)

with open(in_user_pk_path, "rb") as handle1:
    user_embeddings = pickle.loads(handle1.read())

with open(in_item_pk_path, "rb") as handle2:
    item_embeddings = pickle.loads(handle2.read())
    
# Check counts:
print ("\nQuick sanity check: ")
print ("  Number of users: ", len(user_embeddings))
print ("  Number of items: ", len(item_embeddings))
print ("  Type of users: ", type(user_embeddings))
print ("  type of items: ", type(item_embeddings))

user_nn_embeddings = get_nn_embeddings(user_embeddings)
item_nn_embeddings = get_nn_embeddings(item_embeddings)



Initial embeddings will be read from: 
 ---  ../shared_datasets/auto_dataset/exsumm_embedding_mod_punc/user/ratio-0.4.pkl
 ---  ../shared_datasets/auto_dataset/exsumm_embedding_mod_punc/item/ratio-0.4.pkl

Quick sanity check: 
  Number of users:  2928
  Number of items:  1835
  Type of users:  <class 'dict'>
  type of items:  <class 'dict'>


In [7]:
train_df = pd.read_csv(input_trn_file)
valid_df = pd.read_csv(input_vld_file)
test_df = pd.read_csv(input_tst_file)

train_iterator = acquire_dataloader(train_df, pm.B_SIZE, to_random=True)
valid_iterator = acquire_dataloader(valid_df, pm.B_SIZE, to_random=False)
test_iterator = acquire_dataloader(test_df, pm.B_SIZE, to_random=False)

print ("\nQuick sanity check: ")
print ("  Number of train rows (batch): ", len(train_iterator))
print ("  Number of valid rows (batch): ", len(valid_iterator))
print ("  Number of test rows (batch): ", len(test_iterator))
print ()
print ("  Number of train rows (DF): ", len(train_df))
print ("  Number of valid rows (DF): ", len(valid_df))
print ("  Number of test rows (DF): ", len(test_df))




Quick sanity check: 
  Number of train rows (batch):  128
  Number of valid rows (batch):  16
  Number of test rows (batch):  16

  Number of train rows (DF):  16378
  Number of valid rows (DF):  2044
  Number of test rows (DF):  2045



### __NOTE:__ Initialization of various neural components.

In [9]:
'''
# Initialize the CF model here.
'''

ace_model = ESCOFILT(user_nn_embeddings, item_nn_embeddings, pm.MLP_SIZE, pm.EMB_SIZE,
            len(user_embeddings), len(item_embeddings), pm.REDUCE_DIM).to(device)

loss_fxn = nn.MSELoss()

print(f'The AceCF model has {count_parameters(ace_model):,} trainable parameters.')
optimizer = optim.Adam(ace_model.parameters(), lr=pm.CF_LRATE)


trad_user_embeddings.weight
trad_item_embeddings.weight
compress_u.weight
compress_u.bias
compress_i.weight
compress_i.bias
mlp.mlp0.weight
mlp.mlp0.bias
mlp.mlp1.weight
mlp.mlp1.bias
mlp.mlp2.weight
mlp.mlp2.bias
mlp.mlp3.weight
mlp.mlp3.bias
mlp.last_dense.weight
mlp.last_dense.bias
The AceCF model has 915,841 trainable parameters.



### __NOTE:__ Ohhhh, the training proper commences below!

In [10]:
best_epoch = 0
best_valid_loss = float('inf')

for epoch in range(pm.N_EPOCHS):
    
    start_time = time.time()
    
    train_rmse_loss = train_NCF_only(ace_model, train_iterator, optimizer, loss_fxn, pm.B_SIZE, device)
    valid_rmse_loss = evaluate_NCF_only(ace_model, valid_iterator, pm.B_SIZE, device)
    
    train_loss = train_rmse_loss
    valid_loss = valid_rmse_loss
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        best_epoch = epoch
        torch.save(ace_model.state_dict(), ag.TMP_CF_PATH)
    
    print("\n")
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    
    print(f'\t Train RMSE: {train_rmse_loss:.4f}')
    print ("")
    print(f'\t Val. RMSE: {valid_rmse_loss:.4f}')
    print("\n")
    
    if pm.RECORD_TB:
        tra_writer.add_scalar('stats/RMSE', train_rmse_loss, epoch)   
        tra_writer.flush()
        print ("")
        val_writer.add_scalar('stats/RMSE', valid_rmse_loss, epoch) 
        val_writer.flush()


Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 142.92it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 553.83it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.90it/s]



Epoch: 01 | Time: 0m 0s
	 Train RMSE: 1.2964

	 Val. RMSE: 1.0007




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 132.07it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 164.21it/s]
Training Iteration :   9%|▊         | 11/128 [00:00<00:01, 105.95it/s]



Epoch: 02 | Time: 0m 1s
	 Train RMSE: 0.9958

	 Val. RMSE: 0.9421




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 134.25it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 107.12it/s]
Training Iteration :  18%|█▊        | 23/128 [00:00<00:00, 229.57it/s]



Epoch: 03 | Time: 0m 1s
	 Train RMSE: 0.9636

	 Val. RMSE: 0.9689




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 143.63it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 557.52it/s]
Training Iteration :   9%|▊         | 11/128 [00:00<00:01, 100.66it/s]



Epoch: 04 | Time: 0m 0s
	 Train RMSE: 0.9699

	 Val. RMSE: 0.9259




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 133.46it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 148.18it/s]
Training Iteration :  18%|█▊        | 23/128 [00:00<00:00, 229.16it/s]



Epoch: 05 | Time: 0m 1s
	 Train RMSE: 0.9337

	 Val. RMSE: 0.9576




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 111.96it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 110.27it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.63it/s]



Epoch: 06 | Time: 0m 1s
	 Train RMSE: 0.9110

	 Val. RMSE: 0.9806




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.75it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 109.26it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.08it/s]



Epoch: 07 | Time: 0m 1s
	 Train RMSE: 0.8995

	 Val. RMSE: 0.9910




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.70it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 112.04it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.01it/s]



Epoch: 08 | Time: 0m 1s
	 Train RMSE: 0.9033

	 Val. RMSE: 1.0278




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.79it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 109.67it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.40it/s]



Epoch: 09 | Time: 0m 1s
	 Train RMSE: 0.8873

	 Val. RMSE: 1.0252




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.88it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 109.39it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.42it/s]



Epoch: 10 | Time: 0m 1s
	 Train RMSE: 0.8679

	 Val. RMSE: 0.9852




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.60it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 112.08it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 98.66it/s]



Epoch: 11 | Time: 0m 1s
	 Train RMSE: 0.8511

	 Val. RMSE: 0.9290




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.76it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 109.98it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 98.93it/s]



Epoch: 12 | Time: 0m 1s
	 Train RMSE: 0.8692

	 Val. RMSE: 1.0042




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.79it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 109.21it/s]
Training Iteration :   9%|▊         | 11/128 [00:00<00:01, 100.87it/s]



Epoch: 13 | Time: 0m 1s
	 Train RMSE: 0.8455

	 Val. RMSE: 0.9452




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.90it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 109.38it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 98.81it/s]



Epoch: 14 | Time: 0m 1s
	 Train RMSE: 0.8293

	 Val. RMSE: 0.9362




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 100.81it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 108.46it/s]
Training Iteration :   9%|▊         | 11/128 [00:00<00:01, 104.82it/s]



Epoch: 15 | Time: 0m 1s
	 Train RMSE: 0.8159

	 Val. RMSE: 0.9255




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 101.19it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 111.47it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 98.38it/s]



Epoch: 16 | Time: 0m 1s
	 Train RMSE: 0.8123

	 Val. RMSE: 0.9868




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 132.95it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 131.86it/s]
Training Iteration :  20%|█▉        | 25/128 [00:00<00:00, 234.21it/s]



Epoch: 17 | Time: 0m 1s
	 Train RMSE: 0.8219

	 Val. RMSE: 0.9296




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 144.68it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 563.76it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.30it/s]



Epoch: 18 | Time: 0m 0s
	 Train RMSE: 0.8097

	 Val. RMSE: 0.9362




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 131.97it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 109.49it/s]
Training Iteration :  19%|█▉        | 24/128 [00:00<00:00, 238.11it/s]



Epoch: 19 | Time: 0m 1s
	 Train RMSE: 0.7935

	 Val. RMSE: 0.9762




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 142.27it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 558.48it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 99.13it/s]



Epoch: 20 | Time: 0m 0s
	 Train RMSE: 0.7909

	 Val. RMSE: 0.9925




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 128.67it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 116.35it/s]
Training Iteration :  19%|█▉        | 24/128 [00:00<00:00, 234.11it/s]



Epoch: 21 | Time: 0m 1s
	 Train RMSE: 0.8037

	 Val. RMSE: 0.9313




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 144.07it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 562.95it/s]
Training Iteration :   9%|▊         | 11/128 [00:00<00:01, 100.82it/s]



Epoch: 22 | Time: 0m 0s
	 Train RMSE: 0.7776

	 Val. RMSE: 0.9794




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 130.71it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 112.90it/s]
Training Iteration :  17%|█▋        | 22/128 [00:00<00:00, 218.06it/s]



Epoch: 23 | Time: 0m 1s
	 Train RMSE: 0.7888

	 Val. RMSE: 0.9505




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 139.99it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 180.23it/s]
Training Iteration :   7%|▋         | 9/128 [00:00<00:01, 83.49it/s]



Epoch: 24 | Time: 0m 1s
	 Train RMSE: 0.7656

	 Val. RMSE: 0.9521




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 130.62it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 205.02it/s]
Training Iteration :  18%|█▊        | 23/128 [00:00<00:00, 212.19it/s]



Epoch: 25 | Time: 0m 1s
	 Train RMSE: 0.7649

	 Val. RMSE: 0.9673




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 145.31it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 195.51it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 98.86it/s]



Epoch: 26 | Time: 0m 0s
	 Train RMSE: 0.7647

	 Val. RMSE: 0.9432




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 131.22it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 188.93it/s]
Training Iteration :  18%|█▊        | 23/128 [00:00<00:00, 215.09it/s]



Epoch: 27 | Time: 0m 1s
	 Train RMSE: 0.7629

	 Val. RMSE: 0.9447




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 144.57it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 195.78it/s]
Training Iteration :   8%|▊         | 10/128 [00:00<00:01, 98.03it/s]



Epoch: 28 | Time: 0m 0s
	 Train RMSE: 0.7429

	 Val. RMSE: 0.9879




Training Iteration : 100%|██████████| 128/128 [00:00<00:00, 133.03it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 169.99it/s]
Training Iteration :   7%|▋         | 9/128 [00:00<00:01, 89.01it/s]



Epoch: 29 | Time: 0m 1s
	 Train RMSE: 0.7635

	 Val. RMSE: 0.9209




Training Iteration : 100%|██████████| 128/128 [00:01<00:00, 125.10it/s]
Validation Iteration : 100%|██████████| 16/16 [00:00<00:00, 161.53it/s]



Epoch: 30 | Time: 0m 1s
	 Train RMSE: 0.7591

	 Val. RMSE: 0.9526







In [11]:
print (f"Validation's best epoch # {best_epoch} with RMSE of {best_valid_loss: .4f}")

Validation's best epoch # 28 with RMSE of  0.9209


In [12]:
model_state_dict = torch.load(ag.TMP_CF_PATH)
ace_model.load_state_dict(model_state_dict)

test_rmse_loss = evaluate_NCF_only(ace_model, test_iterator, pm.B_SIZE, device, mode="Test")

print (f"\n\nFinally, Test RMSE is {test_rmse_loss: .4f}")

Test Iteration : 100%|██████████| 16/16 [00:00<00:00, 108.02it/s]



Finally, Test RMSE is  0.9041





In [13]:
print ("[INFO to remember]")
print (" > Dataset/domain: ", pm.DOMAIN)
print (" > Item ratio: ", pm.ITEM_RATIO)
print (" > User ratio: ", pm.USER_RATIO)
print (" > LR: ", pm.CF_LRATE)
print (" > Batch size: ", pm.B_SIZE)
print (" > Reduce DIM: ", pm.REDUCE_DIM)

[INFO to remember]
 > Dataset/domain:  auto
 > Item ratio:  0.4
 > User ratio:  0.4
 > LR:  0.004
 > Batch size:  128
 > Reduce DIM:  128
