In [29]:
import numpy as np
from openrec.tf1.legacy import ImplicitModelTrainer
from openrec.tf1.legacy.utils import ImplicitDataset
from openrec.tf1.legacy.recommenders import CML, BPR
from openrec.tf1.legacy.utils.evaluators import AUC
from openrec.tf1.legacy.utils.samplers import PairwiseSampler

In [30]:
import numpy as np
import pandas as pd
import scipy.sparse as sps
import os

SPLIT_UNBIASED_PREPROCESS = True

if SPLIT_UNBIASED_PREPROCESS:
    folder_name = "./Pre-Process-Split/"
else:
    folder_name = "./Post-Process-Split/"

seed = [76424236, 35427387, 90786253, 84636361, 61510520, 76632867, 27038789, 13359944, 28580255, 44670607]
split = 1

folder_name += f"/Split_data/split_{split}/"
if os.path.exists(folder_name) == False:
    os.makedirs(folder_name)

np.random.seed(seed=seed[split])

In [31]:
raw_data = dict()
raw_data['train_data'] = np.load(folder_name + "training_arr.npy")
raw_data['val_data'] = np.load(folder_name + "validation_arr.npy")
raw_data['max_user'] = 15401
raw_data['max_item'] = 1001
batch_size = 8000
test_batch_size = 1000
display_itr = 1000

train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val')

# Define Model

In [32]:
#Code to avoid tf using cached embeddings
import tensorflow as tf
tf.compat.v1.reset_default_graph()

In [33]:
cml_model = CML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 
    dim_embed=50, l2_reg=0.001, opt='Adam', sess_config=None)
sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=4)
model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size,
                                     train_dataset=train_dataset, model=cml_model, sampler=sampler,
                                     eval_save_prefix=folder_name + "yahoo",
                                     item_serving_size=500)
auc_evaluator = AUC()

# Train Model

In [34]:
model_trainer.train(num_itr=10001, display_itr=display_itr, eval_datasets=[val_dataset],
                    evaluators=[auc_evaluator], num_negatives=200)

[Subsampling negative items]


                                                      

== Start training with sampled evaluation, sample size: 200 ==
[Itr 100] Finished
[Itr 200] Finished
[Itr 300] Finished
[Itr 400] Finished
[Itr 500] Finished
[Itr 600] Finished
[Itr 700] Finished
[Itr 800] Finished
[Itr 900] Finished
[Itr 1000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-1000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 1000] loss: 2140.002342
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2263.02it/s]


..(dataset: Val) AUC 0.8766426740384301
[Itr 1100] Finished
[Itr 1200] Finished
[Itr 1300] Finished
[Itr 1400] Finished
[Itr 1500] Finished
[Itr 1600] Finished
[Itr 1700] Finished
[Itr 1800] Finished
[Itr 1900] Finished
[Itr 2000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-2000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 2000] loss: 739.115833
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2262.96it/s]


..(dataset: Val) AUC 0.8896092955077521
[Itr 2100] Finished
[Itr 2200] Finished
[Itr 2300] Finished
[Itr 2400] Finished
[Itr 2500] Finished
[Itr 2600] Finished
[Itr 2700] Finished
[Itr 2800] Finished
[Itr 2900] Finished
[Itr 3000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-3000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 3000] loss: 629.088245
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2167.01it/s]


..(dataset: Val) AUC 0.893163032365882
[Itr 3100] Finished
[Itr 3200] Finished
[Itr 3300] Finished
[Itr 3400] Finished
[Itr 3500] Finished
[Itr 3600] Finished
[Itr 3700] Finished
[Itr 3800] Finished
[Itr 3900] Finished
[Itr 4000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-4000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 4000] loss: 580.290444
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2123.15it/s]


..(dataset: Val) AUC 0.8943051789625215
[Itr 4100] Finished
[Itr 4200] Finished
[Itr 4300] Finished
[Itr 4400] Finished
[Itr 4500] Finished
[Itr 4600] Finished
[Itr 4700] Finished
[Itr 4800] Finished
[Itr 4900] Finished
[Itr 5000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-5000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 5000] loss: 557.005479
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2165.93it/s]


..(dataset: Val) AUC 0.8943385569089678
[Itr 5100] Finished
[Itr 5200] Finished
[Itr 5300] Finished
[Itr 5400] Finished
[Itr 5500] Finished
[Itr 5600] Finished
[Itr 5700] Finished
[Itr 5800] Finished
[Itr 5900] Finished
[Itr 6000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-6000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 6000] loss: 542.407909
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2165.90it/s]


..(dataset: Val) AUC 0.8944346348883112
[Itr 6100] Finished
[Itr 6200] Finished
[Itr 6300] Finished
[Itr 6400] Finished
[Itr 6500] Finished
[Itr 6600] Finished
[Itr 6700] Finished
[Itr 6800] Finished
[Itr 6900] Finished
[Itr 7000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-7000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 7000] loss: 533.051666
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 1952.54it/s]


..(dataset: Val) AUC 0.8941038410273942
[Itr 7100] Finished
[Itr 7200] Finished
[Itr 7300] Finished
[Itr 7400] Finished
[Itr 7500] Finished
[Itr 7600] Finished
[Itr 7700] Finished
[Itr 7800] Finished
[Itr 7900] Finished
[Itr 8000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-8000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 8000] loss: 528.532815
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2169.12it/s]


..(dataset: Val) AUC 0.8940128108151667
[Itr 8100] Finished
[Itr 8200] Finished
[Itr 8300] Finished
[Itr 8400] Finished
[Itr 8500] Finished
[Itr 8600] Finished
[Itr 8700] Finished
[Itr 8800] Finished
[Itr 8900] Finished
[Itr 9000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-9000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 9000] loss: 524.719920
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2197.06it/s]


..(dataset: Val) AUC 0.8935377325592568
[Itr 9100] Finished
[Itr 9200] Finished
[Itr 9300] Finished
[Itr 9400] Finished
[Itr 9500] Finished
[Itr 9600] Finished
[Itr 9700] Finished
[Itr 9800] Finished
[Itr 9900] Finished
[Itr 10000] Finished
INFO:tensorflow:./Split_data/split_4/yahoo-10000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 10000] loss: 521.369804
..(dataset: Val) evaluation


100%|██████████| 7396/7396 [00:03<00:00, 2055.84it/s]


..(dataset: Val) AUC 0.8935025940971733


In [35]:
cml_model.save(folder_name + "cml-yahoo",None)

INFO:tensorflow:./Split_data/split_4/cml-yahoo is not in all_model_checkpoint_paths. Manually adding it.
