# Session-based Recommendation with XLNET

> following [this tutorial](https://github.com/NVIDIA-Merlin/Transformers4Rec/blob/main/examples/getting-started-session-based/02-session-based-XLNet-with-PyT.ipynb)

In [25]:
import os

os.environ["CUDA_VISIBLE_DEVICES"]="0"

import glob
import torch 

from transformers4rec import torch as tr
from transformers4rec.torch.ranking_metric import NDCGAt, AvgPrecisionAt, RecallAt
from transformers4rec.torch.utils.examples_utils import wipe_memory

import nvtabular as nvt

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'
REGION = "us-central1"

# VERTEX_SA = '934903580331-compute@developer.gserviceaccount.com'
VERTEX_SA = 'jt-vertex-sa@hybrid-vertex.iam.gserviceaccount.com'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")
print(f"REGION: {REGION}")
print(f"VERTEX_SA: {VERTEX_SA}")

## Set the schema object

In [2]:
REPO_WORKSPACE = 'workspace'

DATA_DIR = 'data'
INPUT_DATA_DIR=f'{REPO_WORKSPACE}/{DATA_DIR}'
TRANSFORMED_WORKFLOW=f'{INPUT_DATA_DIR}/processed_nvt'
OUTPUT_WORKFLOW_DIR=f'{INPUT_DATA_DIR}/workflow_etl'
OUTPUT_DIR=f'{INPUT_DATA_DIR}/sessions_by_day'
TRAIN_PATHS=f'{OUTPUT_DIR}/1' #/train.parquet'

print(f"INPUT_DATA_DIR: {INPUT_DATA_DIR}")
print(f"TRANSFORMED_WORKFLOW: {TRANSFORMED_WORKFLOW}")
print(f"OUTPUT_WORKFLOW_DIR: {OUTPUT_WORKFLOW_DIR}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")
print(f"TRAIN_PATHS: {TRAIN_PATHS}")

INPUT_DATA_DIR: workspace/data
TRANSFORMED_WORKFLOW: workspace/data/processed_nvt
OUTPUT_WORKFLOW_DIR: workspace/data/workflow_etl
OUTPUT_DIR: workspace/data/sessions_by_day
TRAIN_PATHS: workspace/data/sessions_by_day/1


In [3]:
from merlin_standard_lib import Schema
# SCHEMA_PATH = os.environ.get("INPUT_SCHEMA_PATH", "/workspace/data/processed_nvt/schema.pbtxt")
SCHEMA_PATH = f'{TRANSFORMED_WORKFLOW}/schema.pbtxt'
schema = Schema().from_proto_text(SCHEMA_PATH)

In [4]:
!head -20 $SCHEMA_PATH

feature {
  name: "session_id"
  type: INT
  int_domain {
    name: "session_id"
    max: 19867
    is_categorical: true
  }
  annotation {
    tag: "categorical"
    extra_metadata {
      type_url: "type.googleapis.com/google.protobuf.Struct"
      value: "\n\021\n\013num_buckets\022\002\010\000\n\033\n\016freq_threshold\022\t\021\000\000\000\000\000\000\000\000\n\025\n\010max_size\022\t\021\000\000\000\000\000\000\000\000\n\030\n\013start_index\022\t\021\000\000\000\000\000\000\000\000\n5\n\010cat_path\022)\032\'.//categories/unique.session_id.parquet\nG\n\017embedding_sizes\0224*2\n\030\n\013cardinality\022\t\021\000\000\000\000\000g\323@\n\026\n\tdimension\022\t\021\000\000\000\000\000\200y@\n\034\n\017dtype_item_size\022\t\021\000\000\000\000\000\000P@\n\r\n\007is_list\022\002 \000\n\017\n\tis_ragged\022\002 \000"
    }
  }
}
feature {
  name: "day-first"
  type: INT
  annotation {


In [5]:
# You can select a subset of features for training
schema = schema.select_by_name(['item_id-list', 
                                'category-list', 
                                'weekday_sin-list',
                                'age_days-list'])

In [7]:
!ls $SCHEMA_PATH

workspace/data/processed_nvt/schema.pbtxt


## Define the sequential input module

In [8]:
inputs = tr.TabularSequenceFeatures.from_schema(
        schema,
        max_sequence_length=20,
        continuous_projection=64,
        masking="mlm",
        d_output=100,
)

## Define the Transformer block

In [9]:
# Define XLNetConfig class and set default parameters for HF XLNet config  
transformer_config = tr.XLNetConfig.build(
    d_model=64, n_head=4, n_layer=2, total_seq_length=20
)
# Define the model block including: inputs, masking, projection and transformer block.
body = tr.SequentialBlock(
    inputs, tr.MLPBlock([64]), tr.TransformerBlock(transformer_config, masking=inputs.masking)
)

# Define the evaluation top-N metrics and the cut-offs
metrics = [NDCGAt(top_ks=[20, 40], labels_onehot=True),  
           RecallAt(top_ks=[20, 40], labels_onehot=True)]

# Define a head related to next item prediction task 
head = tr.Head(
    body,
    tr.NextItemPredictionTask(weight_tying=True, 
                              metrics=metrics),
    inputs=inputs,
)

# Get the end-to-end Model class 
model = tr.Model(head)

In [None]:
# Get the end-to-end model 
model_test = transformer_config.to_torch_model(inputs, prediction_task)
model_test
# https://nvidia-merlin.github.io/Transformers4Rec/main/examples/end-to-end-session-based/02-End-to-end-session-based-with-Yoochoose-PyT.html

# Train Model

## set training args

In [10]:
per_device_train_batch_size = int(os.environ.get(
    "per_device_train_batch_size", 
    '128'
))

per_device_eval_batch_size = int(os.environ.get(
    "per_device_eval_batch_size", 
    '32'
))

In [11]:
from transformers4rec.config.trainer import T4RecTrainingArguments
from transformers4rec.torch import Trainer
# Set hyperparameters for training 
train_args = T4RecTrainingArguments(data_loader_engine='merlin', 
                                    dataloader_drop_last = True,
                                    gradient_accumulation_steps = 1,
                                    per_device_train_batch_size = per_device_train_batch_size, 
                                    per_device_eval_batch_size = per_device_eval_batch_size,
                                    output_dir = "./tmp", 
                                    learning_rate=0.0005,
                                    lr_scheduler_type='cosine', 
                                    learning_rate_num_cosine_cycles_by_epoch=1.5,
                                    num_train_epochs=5,
                                    max_sequence_length=20, 
                                    report_to = [],
                                    logging_steps=50,
                                    no_cuda=False)

## Daily Fine-Tuning: Training over a time window

In [12]:
# Instantiate the T4Rec Trainer, which manages training and evaluation for the PyTorch API
trainer = Trainer(
    model=model,
    args=train_args,
    schema=schema,
    compute_metrics=True,
)

In [13]:
# INPUT_DATA_DIR = os.environ.get("INPUT_DATA_DIR", "/workspace/data")
# OUTPUT_DIR = os.environ.get("OUTPUT_DIR", f"{INPUT_DATA_DIR}/sessions_by_day")

start_window_index = int(os.environ.get(
    "start_window_index", 
    '1'
))

final_window_index = int(os.environ.get(
    "final_window_index", 
    '8'
))

In [14]:
start_time_window_index = start_window_index
final_time_window_index = final_window_index
#Iterating over days of one week
for time_index in range(start_time_window_index, final_time_window_index):
    # Set data 
    time_index_train = time_index
    time_index_eval = time_index + 1
    train_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_train}/train.parquet"))
    eval_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_eval}/valid.parquet"))
    print(train_paths)
    
    # Train on day related to time_index 
    print('*'*20)
    print("Launch training for day %s are:" %time_index)
    print('*'*20 + '\n')
    trainer.train_dataset_or_path = train_paths
    trainer.reset_lr_scheduler()
    trainer.train()
    trainer.state.global_step +=1
    print('finished')
    
    # Evaluate on the following day
    trainer.eval_dataset_or_path = eval_paths
    train_metrics = trainer.evaluate(metric_key_prefix='eval')
    print('*'*20)
    print("Eval results for day %s are:\t" %time_index_eval)
    print('\n' + '*'*20 + '\n')
    for key in sorted(train_metrics.keys()):
        print(" %s = %s" % (key, str(train_metrics[key]))) 
    wipe_memory()

['workspace/data/sessions_by_day/1/train.parquet']
********************
Launch training for day 1 are:
********************



***** Running training *****
  Num examples = 1536
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 60


Step,Training Loss
50,5.8508




Training completed. Do not forget to share your model on huggingface.co/models =)




finished


********************
Eval results for day 2 are:	

********************

 eval_/loss = 5.17643404006958
 eval_/next-item/ndcg_at_20 = 0.17471647262573242
 eval_/next-item/ndcg_at_40 = 0.22482089698314667
 eval_/next-item/recall_at_20 = 0.4322916865348816
 eval_/next-item/recall_at_40 = 0.6822916865348816
 eval_runtime = 0.1466
 eval_samples_per_second = 1309.324
 eval_steps_per_second = 40.916


***** Running training *****
  Num examples = 1792
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 70


['workspace/data/sessions_by_day/2/train.parquet']
********************
Launch training for day 2 are:
********************



Step,Training Loss
50,4.9337




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 3 are:	

********************

 eval_/loss = 4.593666076660156
 eval_/next-item/ndcg_at_20 = 0.21282625198364258
 eval_/next-item/ndcg_at_40 = 0.2544132471084595
 eval_/next-item/recall_at_20 = 0.53125
 eval_/next-item/recall_at_40 = 0.734375
 eval_runtime = 0.1513
 eval_samples_per_second = 1269.03
 eval_steps_per_second = 39.657


***** Running training *****
  Num examples = 1664
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 65


['workspace/data/sessions_by_day/3/train.parquet']
********************
Launch training for day 3 are:
********************



Step,Training Loss
50,4.5851




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 4 are:	

********************

 eval_/loss = 4.511531352996826
 eval_/next-item/ndcg_at_20 = 0.16087283194065094
 eval_/next-item/ndcg_at_40 = 0.23196697235107422
 eval_/next-item/recall_at_20 = 0.4322916865348816
 eval_/next-item/recall_at_40 = 0.7760416865348816
 eval_runtime = 0.1432
 eval_samples_per_second = 1340.964
 eval_steps_per_second = 41.905


***** Running training *****
  Num examples = 1536
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 60


['workspace/data/sessions_by_day/4/train.parquet']
********************
Launch training for day 4 are:
********************



Step,Training Loss
50,4.5132




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 5 are:	

********************

 eval_/loss = 4.298824787139893
 eval_/next-item/ndcg_at_20 = 0.22139111161231995
 eval_/next-item/ndcg_at_40 = 0.2713688910007477
 eval_/next-item/recall_at_20 = 0.5520833730697632
 eval_/next-item/recall_at_40 = 0.796875
 eval_runtime = 0.1409
 eval_samples_per_second = 1362.796
 eval_steps_per_second = 42.587


***** Running training *****
  Num examples = 1664
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 65


['workspace/data/sessions_by_day/5/train.parquet']
********************
Launch training for day 5 are:
********************



Step,Training Loss
50,4.4954




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 6 are:	

********************

 eval_/loss = 4.3619303703308105
 eval_/next-item/ndcg_at_20 = 0.18418318033218384
 eval_/next-item/ndcg_at_40 = 0.2430708408355713
 eval_/next-item/recall_at_20 = 0.5104166865348816
 eval_/next-item/recall_at_40 = 0.796875
 eval_runtime = 0.1489
 eval_samples_per_second = 1289.264
 eval_steps_per_second = 40.289


***** Running training *****
  Num examples = 1664
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 65


['workspace/data/sessions_by_day/6/train.parquet']
********************
Launch training for day 6 are:
********************



Step,Training Loss
50,4.4933




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 7 are:	

********************

 eval_/loss = 4.410634517669678
 eval_/next-item/ndcg_at_20 = 0.2138700783252716
 eval_/next-item/ndcg_at_40 = 0.2573198974132538
 eval_/next-item/recall_at_20 = 0.5572916865348816
 eval_/next-item/recall_at_40 = 0.7708333730697632
 eval_runtime = 0.1427
 eval_samples_per_second = 1345.179
 eval_steps_per_second = 42.037


***** Running training *****
  Num examples = 1664
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 65


['workspace/data/sessions_by_day/7/train.parquet']
********************
Launch training for day 7 are:
********************



Step,Training Loss
50,4.4827




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 8 are:	

********************

 eval_/loss = 4.469639301300049
 eval_/next-item/ndcg_at_20 = 0.1843894124031067
 eval_/next-item/ndcg_at_40 = 0.23753295838832855
 eval_/next-item/recall_at_20 = 0.5052083730697632
 eval_/next-item/recall_at_40 = 0.765625
 eval_runtime = 0.1455
 eval_samples_per_second = 1319.515
 eval_steps_per_second = 41.235


# Re-compute evaluation metrics of the validation data

In [16]:
eval_data_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_eval}/valid.parquet"))

# eval_data_paths = f'{OUTPUT_DIR}/{time_index_eval}/valid.parquet'
print(f"eval_data_paths: {eval_data_paths}")

eval_data_paths: ['workspace/data/sessions_by_day/8/valid.parquet']


In [17]:
# set new data from day 7
eval_metrics = trainer.evaluate(eval_dataset=eval_data_paths, metric_key_prefix='eval')
for key in sorted(eval_metrics.keys()):
    print("  %s = %s" % (key, str(eval_metrics[key])))

  eval_/loss = 4.469639301300049
  eval_/next-item/ndcg_at_20 = 0.1843894124031067
  eval_/next-item/ndcg_at_40 = 0.23753295838832855
  eval_/next-item/recall_at_20 = 0.5052083730697632
  eval_/next-item/recall_at_40 = 0.765625
  eval_runtime = 0.1484
  eval_samples_per_second = 1294.091
  eval_steps_per_second = 40.44


## Save the model

In [18]:
# model_path= os.environ.get("OUTPUT_DIR", f"{INPUT_DATA_DIR}/saved_model")
MODEL_PATH = f'{INPUT_DATA_DIR}/saved_model'
model.save(MODEL_PATH)

In [22]:
OUTPUT_WORKFLOW_DIR

'workspace/data/workflow_etl'

In [53]:
first_parameter = next(model.parameters())
input_shape = first_parameter.size()
input_shape

torch.Size([64, 2])

In [54]:
# model

### Save method 2
* see [here](https://nvidia-merlin.github.io/Transformers4Rec/main/examples/end-to-end-session-based/02-End-to-end-session-based-with-Yoochoose-PyT.html) for more details

per Ronnay, don't use `export_pytorch_ensemble`

In [26]:
workflow = nvt.Workflow.load(OUTPUT_WORKFLOW_DIR)

In [39]:
# from nvtabular.inference.triton import export_pytorch_ensemble

# export_pytorch_ensemble(
#     model,
#     workflow,
#     sparse_max=trainer.get_train_dataloader().dataset.sparse_max,
#     name= "t4r_pytorch",
#     model_path= "./torch_ensemble/models/",
#     label_columns =[],
# )

In [37]:
!tree ./workspace_v2/data/models

[01;34m./workspace_v2/data/models[00m
├── [01;34mt4r_pytorch[00m
│   ├── [01;34m1[00m
│   └── config.pbtxt
├── [01;34mt4r_pytorch_nvt[00m
│   ├── [01;34m1[00m
│   │   ├── model.py
│   │   └── [01;34mworkflow[00m
│   │       ├── [01;34mcategories[00m
│   │       │   ├── unique.category.parquet
│   │       │   ├── unique.item_id.parquet
│   │       │   └── unique.session_id.parquet
│   │       ├── metadata.json
│   │       └── workflow.pkl
│   └── config.pbtxt
└── [01;34mt4r_pytorch_pt[00m
    ├── [01;34m1[00m
    │   ├── model.pkl
    │   ├── model.pth
    │   ├── model.py
    │   └── model_info.json
    └── config.pbtxt

8 directories, 13 files


In [38]:
!tree ./workspace/data/models

[01;34m./workspace/data/models[00m
├── [01;34m0_predictpytorchtriton[00m
│   ├── [01;34m1[00m
│   │   └── model.pt
│   └── config.pbtxt
└── [01;34mensemble_model[00m
    ├── [01;34m1[00m
    └── config.pbtxt

4 directories, 3 files
