# **SET UP**

## Import

In [1]:
import numpy as np
from openrec.tf1.legacy import ImplicitModelTrainer
from openrec.tf1.legacy.utils import ImplicitDataset
from openrec.tf1.legacy.recommenders import CML, BPR
from openrec.tf1.legacy.utils.evaluators import AUC
from openrec.tf1.legacy.utils.samplers import PairwiseSampler

## Init

In [2]:
import numpy as np
import pandas as pd
import scipy.sparse as sps
import os

seed = 76424236
np.random.seed(seed=seed)

folder_name = f"./Dataset/"

if os.path.exists(folder_name) == False:
    os.makedirs(folder_name)

## Load training dataset

In [3]:
raw_data = dict()
raw_data['train_data'] = np.load(folder_name + "training_arr.npy")
raw_data['val_data'] = np.load(folder_name + "validation_arr.npy")
raw_data['max_user'] = 15401
raw_data['max_item'] = 1001
batch_size = 8000
test_batch_size = 1000
display_itr = 1000

train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val')

# **TRAIN MODEL**

## Define model

In [4]:
# Avoid tensorflow using cached embeddings
import tensorflow as tf
tf.compat.v1.reset_default_graph()


cml_model = CML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 
    dim_embed=50, l2_reg=0.001, opt='Adam', sess_config=None)
sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=4)
model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size,
                                     train_dataset=train_dataset, model=cml_model, sampler=sampler,
                                     eval_save_prefix=folder_name + "yahoo",
                                     item_serving_size=500)
auc_evaluator = AUC()


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
keep_dims is deprecated, use keepdims instead

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where





2024-02-15 16:48:25.872622: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2024-02-15 16:48:25.886383: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2419200000 Hz
2024-02-15 16:48:25.890953: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5631e117ac90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2024-02-15 16:48:25.891006: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version





## Train Model

In [5]:
model_trainer.train(num_itr=10001, display_itr=display_itr, eval_datasets=[val_dataset],
                    evaluators=[auc_evaluator], num_negatives=200)

[Subsampling negative items]


                                                     

== Start training with sampled evaluation, sample size: 200 ==
[Itr 100] Finished
[Itr 200] Finished
[Itr 300] Finished
[Itr 400] Finished
[Itr 500] Finished
[Itr 600] Finished
[Itr 700] Finished
[Itr 800] Finished
[Itr 900] Finished
[Itr 1000] Finished
INFO:tensorflow:./Dataset/yahoo-1000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 1000] loss: 2132.264733
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:05<00:00, 1364.45it/s]


..(dataset: Val) AUC 0.8785219033144833
[Itr 1100] Finished
[Itr 1200] Finished
[Itr 1300] Finished
[Itr 1400] Finished
[Itr 1500] Finished
[Itr 1600] Finished
[Itr 1700] Finished
[Itr 1800] Finished
[Itr 1900] Finished
[Itr 2000] Finished
INFO:tensorflow:./Dataset/yahoo-2000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 2000] loss: 737.583823
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:04<00:00, 1517.59it/s]


..(dataset: Val) AUC 0.8910914633395196
[Itr 2100] Finished
[Itr 2200] Finished
[Itr 2300] Finished
[Itr 2400] Finished
[Itr 2500] Finished
[Itr 2600] Finished
[Itr 2700] Finished
[Itr 2800] Finished
[Itr 2900] Finished
[Itr 3000] Finished
INFO:tensorflow:./Dataset/yahoo-3000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 3000] loss: 628.045095
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:05<00:00, 1396.58it/s]


..(dataset: Val) AUC 0.8951388031680569
[Itr 3100] Finished
[Itr 3200] Finished
[Itr 3300] Finished
[Itr 3400] Finished
[Itr 3500] Finished
[Itr 3600] Finished
[Itr 3700] Finished
[Itr 3800] Finished
[Itr 3900] Finished
[Itr 4000] Finished
INFO:tensorflow:./Dataset/yahoo-4000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 4000] loss: 581.388277
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:07<00:00, 940.36it/s] 


..(dataset: Val) AUC 0.8962030982781471
[Itr 4100] Finished
[Itr 4200] Finished
[Itr 4300] Finished
[Itr 4400] Finished
[Itr 4500] Finished
[Itr 4600] Finished
[Itr 4700] Finished
[Itr 4800] Finished
[Itr 4900] Finished
[Itr 5000] Finished
INFO:tensorflow:./Dataset/yahoo-5000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 5000] loss: 557.894164
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:04<00:00, 1525.70it/s]


..(dataset: Val) AUC 0.8969368878137339
[Itr 5100] Finished
[Itr 5200] Finished
[Itr 5300] Finished
[Itr 5400] Finished
[Itr 5500] Finished
[Itr 5600] Finished
[Itr 5700] Finished
[Itr 5800] Finished
[Itr 5900] Finished
[Itr 6000] Finished
INFO:tensorflow:./Dataset/yahoo-6000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 6000] loss: 542.519586
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:04<00:00, 1560.48it/s]


..(dataset: Val) AUC 0.8970892273843377
[Itr 6100] Finished
[Itr 6200] Finished
[Itr 6300] Finished
[Itr 6400] Finished
[Itr 6500] Finished
[Itr 6600] Finished
[Itr 6700] Finished
[Itr 6800] Finished
[Itr 6900] Finished
[Itr 7000] Finished
INFO:tensorflow:./Dataset/yahoo-7000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 7000] loss: 534.255741
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:04<00:00, 1671.64it/s]


..(dataset: Val) AUC 0.8968870370503513
[Itr 7100] Finished
[Itr 7200] Finished
[Itr 7300] Finished
[Itr 7400] Finished
[Itr 7500] Finished
[Itr 7600] Finished
[Itr 7700] Finished
[Itr 7800] Finished
[Itr 7900] Finished
[Itr 8000] Finished
INFO:tensorflow:./Dataset/yahoo-8000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 8000] loss: 528.727392
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:04<00:00, 1612.54it/s]


..(dataset: Val) AUC 0.8967568938090202
[Itr 8100] Finished
[Itr 8200] Finished
[Itr 8300] Finished
[Itr 8400] Finished
[Itr 8500] Finished
[Itr 8600] Finished
[Itr 8700] Finished
[Itr 8800] Finished
[Itr 8900] Finished
[Itr 9000] Finished
INFO:tensorflow:./Dataset/yahoo-9000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 9000] loss: 522.831909
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:04<00:00, 1627.17it/s]


..(dataset: Val) AUC 0.8966660143131612
[Itr 9100] Finished
[Itr 9200] Finished
[Itr 9300] Finished
[Itr 9400] Finished
[Itr 9500] Finished
[Itr 9600] Finished
[Itr 9700] Finished
[Itr 9800] Finished
[Itr 9900] Finished
[Itr 10000] Finished
INFO:tensorflow:./Dataset/yahoo-10000 is not in all_model_checkpoint_paths. Manually adding it.
[Itr 10000] loss: 520.152143
..(dataset: Val) evaluation


100%|██████████| 7328/7328 [00:04<00:00, 1638.36it/s]


..(dataset: Val) AUC 0.8964089462055704


# **SAVE MODEL**

In [6]:
cml_model.save(folder_name + "cml-yahoo",None)

INFO:tensorflow:./Dataset/cml-yahoo is not in all_model_checkpoint_paths. Manually adding it.
