In [None]:
!pip install transformers4rec[pytorch,nvtabular] -U

In [None]:
!pip install cudf-cu11==22.12 rmm-cu11==22.12 --extra-index-url=https://pypi.ngc.nvidia.com
!pip install cugraph-cu11==22.12 dask-cuda==22.12 dask-cudf-cu11==22.12  pylibcugraph-cu11==22.12 --extra-index-url=https://pypi.ngc.nvidia.com/
!pip install cuml-cu11==22.12 raft_dask_cu11==22.12 dask-cudf-cu11==22.12  pylibraft_cu11==22.12 ucx-py-cu11==0.29.0 --extra-index-url=https://pypi.ngc.nvidia.com


In [None]:
import os
import glob

import torch 
import transformers4rec.torch as tr

from transformers4rec.torch.ranking_metric import NDCGAt, RecallAt
from transformers4rec.torch.utils.examples_utils import wipe_memory

  warn(f"Triton dtype mappings did not load successfully due to an error: {exc.msg}")


In [None]:
# Define categorical and continuous columns to fed to training model
cat_features = ['product_id-list_seq', 
                     'category_id-list_seq']

cont_features = ['price_log_norm-list_seq', 
                     'relative_price_to_avg_categ_id-list_seq']

temp_features = ['et_dayofweek_sin-list_seq',
                 'et_dayofweek_cos-list_seq',
                 'product_recency_days_log_norm-list_seq']

from merlin_standard_lib import Schema

# Define schema object to pass it to the TabularSequenceFeatures class
SCHEMA_PATH ='/content/drive/MyDrive/dataset_rees46/processed_nvt/schema.pbtxt'
schema = Schema().from_proto_text(SCHEMA_PATH)
schema = schema.select_by_name(cat_features + cont_features + temp_features)

In [None]:
schema

[{'name': 'product_id-list_seq', 'value_count': {'min': '2', 'max': '20'}, 'type': 'INT', 'int_domain': {'name': 'product_id', 'max': '166795', 'is_categorical': True}, 'annotation': {'tag': ['list', 'item_id', 'id', 'categorical', 'item'], 'comment': ['{"is_ragged": true, "embedding_sizes": {"dimension": 512.0, "cardinality": 166796.0}, "max_size": 0.0, "is_list": true, "start_index": 1.0, "num_buckets": null, "freq_threshold": 0.0, "cat_path": ".//categories/unique.product_id.parquet", "dtype_item_size": 64.0}']}}, {'name': 'category_id-list_seq', 'value_count': {'min': '2', 'max': '20'}, 'type': 'INT', 'int_domain': {'name': 'category_id', 'max': '625', 'is_categorical': True}, 'annotation': {'tag': ['categorical', 'list'], 'comment': ['{"dtype_item_size": 64.0, "num_buckets": null, "is_list": true, "embedding_sizes": {"dimension": 59.0, "cardinality": 626.0}, "freq_threshold": 0.0, "max_size": 0.0, "start_index": 1.0, "cat_path": ".//categories/unique.category_id.parquet", "is_ragg

In [None]:
#Input 
sequence_length, d_model = 20, 320

# Define input module to process tabular input-features and to prepare masked inputs
inputs= tr.TabularSequenceFeatures.from_schema(
    schema,
    max_sequence_length=sequence_length,
    aggregation="concat",
    continuous_projection=d_model,
    d_output=d_model,
    masking="mlm",
)

In [None]:
#import transformers4rec.config.transformer as hf

transformer_config = tr.AlbertConfig.build(
    d_model=d_model, 
    item_embedding_dim = 320,
    n_head=8, 
    n_layer=2, 
    total_seq_length=sequence_length, 
    stochastic_shared_embeddings_replacement_prob = 0.06, #regularization
    input_dropout = 0.1,
    dropout = 0.0, #regularization
    label_smoothing = 0.2, #regularization (proved to be useful in train/val accuracy)
    weight_decay = 9.565968888623912e-05, #regularization,
    item_id_embeddings_init_std = 0.11,
    mlm_probability = 0.6,
    eval_on_last_item_seq_only = True,
    mf_constrained_embeddings = True,
    layer_norm_featurewise = True,
    num_hidden_groups = 1,
    inner_group_num = 1
)

# Define the model block including: inputs, masking, projection and transformer block.
body = tr.SequentialBlock(
    inputs,
    tr.MLPBlock([d_model]),
    tr.TransformerBlock(transformer_config, masking=inputs.masking)
)

# Define the head for to next item prediction task 
head = tr.Head(
    body,
    [tr.NextItemPredictionTask(weight_tying=True,
                              metrics=[NDCGAt(top_ks=[10, 20], labels_onehot=True),  
                                       RecallAt(top_ks=[10, 20], labels_onehot=True)])]           
)

# Get the end-to-end Model class 
model = tr.Model(head)



In [None]:
model 

Model(
  (heads): ModuleList(
    (0): Head(
      (body): SequentialBlock(
        (0): TabularSequenceFeatures(
          (_aggregation): ConcatFeatures()
          (to_merge): ModuleDict(
            (continuous_module): SequentialBlock(
              (0): ContinuousFeatures(
                (filter_features): FilterFeatures()
                (_aggregation): ConcatFeatures()
              )
              (1): SequentialBlock(
                (0): DenseBlock(
                  (0): Linear(in_features=5, out_features=320, bias=True)
                  (1): ReLU(inplace=True)
                )
              )
              (2): AsTabular()
            )
            (categorical_module): SequenceEmbeddingFeatures(
              (filter_features): FilterFeatures()
              (embedding_tables): ModuleDict(
                (product_id-list_seq): Embedding(166796, 64, padding_idx=0)
                (category_id-list_seq): Embedding(626, 64, padding_idx=0)
              )
            )
  

In [None]:
from transformers4rec.config.trainer import T4RecTrainingArguments
from transformers4rec.torch import Trainer
from transformers4rec.torch.utils.data_utils import MerlinDataLoader

#Set arguments for training 
training_args = T4RecTrainingArguments(
            output_dir="/content/drive/MyDrive/dataset_rees46/bert_cat+price+temp",
            max_sequence_length=20,
            data_loader_engine='merlin',
            num_train_epochs=10, 
            dataloader_drop_last=True,
            compute_metrics_each_n_steps = 1,
            per_device_train_batch_size = 192,
            per_device_eval_batch_size = 512,
            gradient_accumulation_steps = 1,
            learning_rate=0.0004904752786458524,
            report_to = [],
            logging_steps=200,
        )

In [None]:
# Instantiate the T4Rec Trainer, which manages training and evaluation
trainer = Trainer(
    model=model,
    args=training_args,
    schema=schema,
    compute_metrics=True,
)

In [None]:
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/content/drive/MyDrive/dataset_rees46/sessions_by_day")

In [None]:
%%time
start_time_window_index = 1
final_time_window_index = 30
for time_index in range(start_time_window_index, final_time_window_index):
    # Set data 
    time_index_train = time_index
    time_index_eval = time_index + 1
    train_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_train}/train.parquet"))
    eval_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_eval}/test.parquet"))
    # Train on day related to time_index 
    print('*'*20)
    print("Launch training for day %s are:" %time_index)
    print('*'*20 + '\n')
    trainer.train_dataset_or_path = train_paths
    trainer.reset_lr_scheduler()
    trainer.train()
    trainer.state.global_step +=1
    # Evaluate on the following day
    trainer.eval_dataset_or_path = eval_paths
    train_metrics = trainer.evaluate(metric_key_prefix='eval')
    print('*'*20)
    print("Eval results for day %s are:\t" %time_index_eval)
    print('\n' + '*'*20 + '\n')
    for key in sorted(train_metrics.keys()):
        print(" %s = %s" % (key, str(train_metrics[key]))) 
    wipe_memory()

********************
Launch training for day 1 are:
********************



***** Running training *****
  Num examples = 111936
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5830


Step,Training Loss
200,10.2405
400,9.1439
600,8.7013
800,8.5265
1000,8.3834
1200,8.1035
1400,8.0483
1600,7.9479
1800,7.6976
2000,7.7473


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 2 are:	

********************

 eval_/loss = 7.696527004241943
 eval_/next-item/ndcg_at_10 = 0.11714424192905426
 eval_/next-item/ndcg_at_20 = 0.13906420767307281
 eval_/next-item/recall_at_10 = 0.2135937511920929
 eval_/next-item/recall_at_20 = 0.3005468547344208
 eval_runtime = 2.6474
 eval_samples_per_second = 4834.921
 eval_steps_per_second = 9.443
********************
Launch training for day 2 are:
********************



***** Running training *****
  Num examples = 105984
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5520


Step,Training Loss
200,7.4316
400,7.38
600,7.2932
800,7.1351
1000,7.0484
1200,6.9648
1400,6.8363
1600,6.8495
1800,6.708
2000,6.6632


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 3 are:	

********************

 eval_/loss = 7.2178874015808105
 eval_/next-item/ndcg_at_10 = 0.13277243077754974
 eval_/next-item/ndcg_at_20 = 0.15664348006248474
 eval_/next-item/recall_at_10 = 0.24575407803058624
 eval_/next-item/recall_at_20 = 0.3402683436870575
 eval_runtime = 2.3384
 eval_samples_per_second = 5035.958
 eval_steps_per_second = 9.836
********************
Launch training for day 3 are:
********************



***** Running training *****
  Num examples = 97728
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5090


Step,Training Loss
200,6.8741
400,6.8352
600,6.7407
800,6.6862
1000,6.6489
1200,6.4919
1400,6.5313
1600,6.4824
1800,6.383
2000,6.3674


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 4 are:	

********************

 eval_/loss = 6.922041893005371
 eval_/next-item/ndcg_at_10 = 0.14837785065174103
 eval_/next-item/ndcg_at_20 = 0.17536403238773346
 eval_/next-item/recall_at_10 = 0.2728515863418579
 eval_/next-item/recall_at_20 = 0.3798828423023224
 eval_runtime = 3.0275
 eval_samples_per_second = 5073.549
 eval_steps_per_second = 9.909
********************
Launch training for day 4 are:
********************



***** Running training *****
  Num examples = 124416
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6480


Step,Training Loss
200,6.5856
400,6.5292
600,6.5514
800,6.4794
1000,6.3507
1200,6.3464
1400,6.3136
1600,6.2246
1800,6.2355
2000,6.1518


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 5 are:	

********************

 eval_/loss = 6.7588114738464355
 eval_/next-item/ndcg_at_10 = 0.1558503359556198
 eval_/next-item/ndcg_at_20 = 0.1834534853696823
 eval_/next-item/recall_at_10 = 0.28587964177131653
 eval_/next-item/recall_at_20 = 0.3949652910232544
 eval_runtime = 2.8111
 eval_samples_per_second = 4917.639
 eval_steps_per_second = 9.605
********************
Launch training for day 5 are:
********************



***** Running training *****
  Num examples = 114432
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5960


Step,Training Loss
200,6.4556
400,6.3842
600,6.4254
800,6.2619
1000,6.2748
1200,6.2687
1400,6.1894
1600,6.1389
1800,6.1607
2000,6.0948


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 6 are:	

********************

 eval_/loss = 6.514852523803711
 eval_/next-item/ndcg_at_10 = 0.16850867867469788
 eval_/next-item/ndcg_at_20 = 0.19693148136138916
 eval_/next-item/recall_at_10 = 0.30714699625968933
 eval_/next-item/recall_at_20 = 0.41956019401550293
 eval_runtime = 2.8985
 eval_samples_per_second = 4769.337
 eval_steps_per_second = 9.315
********************
Launch training for day 6 are:
********************



***** Running training *****
  Num examples = 112704
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5870


Step,Training Loss
200,6.2459
400,6.2679
600,6.3085
800,6.1782
1000,6.1245
1200,6.1637
1400,6.0538
1600,6.0761
1800,6.0495
2000,5.9762


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 7 are:	

********************

 eval_/loss = 6.73734188079834
 eval_/next-item/ndcg_at_10 = 0.16585998237133026
 eval_/next-item/ndcg_at_20 = 0.19271935522556305
 eval_/next-item/recall_at_10 = 0.3043749928474426
 eval_/next-item/recall_at_20 = 0.41062498092651367
 eval_runtime = 2.5383
 eval_samples_per_second = 5042.818
 eval_steps_per_second = 9.849
********************
Launch training for day 7 are:
********************



***** Running training *****
  Num examples = 105600
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5500


Step,Training Loss
200,6.3924
400,6.3865
600,6.3271
800,6.2358
1000,6.2753
1200,6.2064
1400,6.1388
1600,6.1242
1800,6.0741
2000,6.0542


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 8 are:	

********************

 eval_/loss = 6.7682061195373535
 eval_/next-item/ndcg_at_10 = 0.16510945558547974
 eval_/next-item/ndcg_at_20 = 0.1920033097267151
 eval_/next-item/recall_at_10 = 0.3053385615348816
 eval_/next-item/recall_at_20 = 0.41165366768836975
 eval_runtime = 3.1664
 eval_samples_per_second = 4850.93
 eval_steps_per_second = 9.474
********************
Launch training for day 8 are:
********************



***** Running training *****
  Num examples = 124992
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6510


Step,Training Loss
200,6.4322
400,6.3922
600,6.4206
800,6.3469
1000,6.2517
1200,6.2692
1400,6.2393
1600,6.1604
1800,6.1809
2000,6.1634


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 9 are:	

********************

 eval_/loss = 6.707037448883057
 eval_/next-item/ndcg_at_10 = 0.17292611300945282
 eval_/next-item/ndcg_at_20 = 0.1998969465494156
 eval_/next-item/recall_at_10 = 0.3137122690677643
 eval_/next-item/recall_at_20 = 0.42046067118644714
 eval_runtime = 3.0234
 eval_samples_per_second = 4911.031
 eval_steps_per_second = 9.592
********************
Launch training for day 9 are:
********************



***** Running training *****
  Num examples = 120768
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6290


Step,Training Loss
200,6.4187
400,6.365
600,6.4559
800,6.2824
1000,6.2622
1200,6.2757
1400,6.2398
1600,6.1577
1800,6.1323
2000,6.1461


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 10 are:	

********************

 eval_/loss = 6.752028942108154
 eval_/next-item/ndcg_at_10 = 0.17180365324020386
 eval_/next-item/ndcg_at_20 = 0.19976353645324707
 eval_/next-item/recall_at_10 = 0.31090855598449707
 eval_/next-item/recall_at_20 = 0.4212239682674408
 eval_runtime = 2.746
 eval_samples_per_second = 5034.208
 eval_steps_per_second = 9.832
********************
Launch training for day 10 are:
********************



***** Running training *****
  Num examples = 112320
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5850


Step,Training Loss
200,6.3908
400,6.3705
600,6.4132
800,6.2395
1000,6.234
1200,6.2676
1400,6.1539
1600,6.1383
1800,6.1507
2000,6.1186


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 11 are:	

********************

 eval_/loss = 6.769430160522461
 eval_/next-item/ndcg_at_10 = 0.1695411205291748
 eval_/next-item/ndcg_at_20 = 0.1964038908481598
 eval_/next-item/recall_at_10 = 0.30731201171875
 eval_/next-item/recall_at_20 = 0.4132080078125
 eval_runtime = 3.2522
 eval_samples_per_second = 5037.864
 eval_steps_per_second = 9.84
********************
Launch training for day 11 are:
********************



***** Running training *****
  Num examples = 132480
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6900


Step,Training Loss
200,6.3925
400,6.406
600,6.4064
800,6.3433
1000,6.2792
1200,6.2444
1400,6.2913
1600,6.1595
1800,6.1265
2000,6.2008


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 12 are:	

********************

 eval_/loss = 6.545329570770264
 eval_/next-item/ndcg_at_10 = 0.1797453612089157
 eval_/next-item/ndcg_at_20 = 0.20627903938293457
 eval_/next-item/recall_at_10 = 0.32591983675956726
 eval_/next-item/recall_at_20 = 0.43094757199287415
 eval_runtime = 3.2045
 eval_samples_per_second = 4953.067
 eval_steps_per_second = 9.674
********************
Launch training for day 12 are:
********************



***** Running training *****
  Num examples = 128640
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6700


Step,Training Loss
200,6.2598
400,6.3046
600,6.3345
800,6.3097
1000,6.1387
1200,6.134
1400,6.2208
1600,6.0696
1800,6.0824
2000,6.1335


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 13 are:	

********************

 eval_/loss = 6.5599822998046875
 eval_/next-item/ndcg_at_10 = 0.1782379150390625
 eval_/next-item/ndcg_at_20 = 0.20687375962734222
 eval_/next-item/recall_at_10 = 0.32042738795280457
 eval_/next-item/recall_at_20 = 0.43370863795280457
 eval_runtime = 3.4632
 eval_samples_per_second = 5026.613
 eval_steps_per_second = 9.818
********************
Launch training for day 13 are:
********************



***** Running training *****
  Num examples = 141312
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 7360


Step,Training Loss
200,6.2185
400,6.1875
600,6.1965
800,6.1688
1000,6.1003
1200,6.071
1400,6.0721
1600,6.0279
1800,6.0225
2000,6.0115


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 14 are:	

********************

 eval_/loss = 6.580650329589844
 eval_/next-item/ndcg_at_10 = 0.18118862807750702
 eval_/next-item/ndcg_at_20 = 0.20855626463890076
 eval_/next-item/recall_at_10 = 0.3265625238418579
 eval_/next-item/recall_at_20 = 0.4345703423023224
 eval_runtime = 3.0824
 eval_samples_per_second = 4983.118
 eval_steps_per_second = 9.733
********************
Launch training for day 14 are:
********************



***** Running training *****
  Num examples = 127296
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6630


Step,Training Loss
200,6.221
400,6.2354
600,6.2921
800,6.1996
1000,6.0834
1200,6.0951
1400,6.173
1600,6.003
1800,6.0094
2000,6.0937


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 15 are:	

********************

 eval_/loss = 6.633407115936279
 eval_/next-item/ndcg_at_10 = 0.18199607729911804
 eval_/next-item/ndcg_at_20 = 0.20925942063331604
 eval_/next-item/recall_at_10 = 0.3272705078125
 eval_/next-item/recall_at_20 = 0.43524169921875
 eval_runtime = 3.692
 eval_samples_per_second = 4437.686
 eval_steps_per_second = 8.667
********************
Launch training for day 15 are:
********************



***** Running training *****
  Num examples = 136320
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 7100


Step,Training Loss
200,6.2632
400,6.2691
600,6.2625
800,6.23
1000,6.1539
1200,6.1118
1400,6.1777
1600,6.0768
1800,6.0256
2000,6.0366


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 16 are:	

********************

 eval_/loss = 6.580458164215088
 eval_/next-item/ndcg_at_10 = 0.17989075183868408
 eval_/next-item/ndcg_at_20 = 0.20800766348838806
 eval_/next-item/recall_at_10 = 0.32470703125
 eval_/next-item/recall_at_20 = 0.43603515625
 eval_runtime = 3.4135
 eval_samples_per_second = 4799.703
 eval_steps_per_second = 9.374
********************
Launch training for day 16 are:
********************



***** Running training *****
  Num examples = 133632
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6960


Step,Training Loss
200,6.2275
400,6.2098
600,6.271
800,6.2311
1000,6.1056
1200,6.0914
1400,6.2351
1600,6.0112
1800,6.0233
2000,6.0866


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 17 are:	

********************

 eval_/loss = 6.587253093719482
 eval_/next-item/ndcg_at_10 = 0.18366508185863495
 eval_/next-item/ndcg_at_20 = 0.212590754032135
 eval_/next-item/recall_at_10 = 0.3310883641242981
 eval_/next-item/recall_at_20 = 0.44564923644065857
 eval_runtime = 2.9866
 eval_samples_per_second = 4971.604
 eval_steps_per_second = 9.71
********************
Launch training for day 17 are:
********************



***** Running training *****
  Num examples = 122304
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6370


Step,Training Loss
200,6.2587
400,6.2337
600,6.3169
800,6.207
1000,6.1187
1200,6.151
1400,6.1746
1600,6.0172
1800,6.0546
2000,6.1045


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 18 are:	

********************

 eval_/loss = 6.684413909912109
 eval_/next-item/ndcg_at_10 = 0.18324542045593262
 eval_/next-item/ndcg_at_20 = 0.2101973444223404
 eval_/next-item/recall_at_10 = 0.3276839554309845
 eval_/next-item/recall_at_20 = 0.43441277742385864
 eval_runtime = 3.2037
 eval_samples_per_second = 4954.236
 eval_steps_per_second = 9.676
********************
Launch training for day 18 are:
********************



***** Running training *****
  Num examples = 129216
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6730


Step,Training Loss
200,6.3697
400,6.2765
600,6.3501
800,6.2804
1000,6.1827
1200,6.1703
1400,6.2487
1600,6.0839
1800,6.078
2000,6.1487


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 19 are:	

********************

 eval_/loss = 6.485246658325195
 eval_/next-item/ndcg_at_10 = 0.18738935887813568
 eval_/next-item/ndcg_at_20 = 0.21495039761066437
 eval_/next-item/recall_at_10 = 0.3351966440677643
 eval_/next-item/recall_at_20 = 0.44410020112991333
 eval_runtime = 3.0092
 eval_samples_per_second = 4934.183
 eval_steps_per_second = 9.637
********************
Launch training for day 19 are:
********************



***** Running training *****
  Num examples = 122304
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6370


Step,Training Loss
200,6.1894
400,6.1576
600,6.269
800,6.1646
1000,6.0661
1200,6.0762
1400,6.1118
1600,5.9939
1800,5.9807
2000,6.0465


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 20 are:	

********************

 eval_/loss = 6.373951435089111
 eval_/next-item/ndcg_at_10 = 0.1924278289079666
 eval_/next-item/ndcg_at_20 = 0.22018948197364807
 eval_/next-item/recall_at_10 = 0.3414713740348816
 eval_/next-item/recall_at_20 = 0.4511719048023224
 eval_runtime = 3.0986
 eval_samples_per_second = 4957.023
 eval_steps_per_second = 9.682
********************
Launch training for day 20 are:
********************



***** Running training *****
  Num examples = 126528
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6590


Step,Training Loss
200,6.0515
400,6.0259
600,6.1184
800,6.0687
1000,5.9523
1200,5.963
1400,6.0076
1600,5.8939
1800,5.8491
2000,5.9838


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 21 are:	

********************

 eval_/loss = 6.326855659484863
 eval_/next-item/ndcg_at_10 = 0.1972464621067047
 eval_/next-item/ndcg_at_20 = 0.22470349073410034
 eval_/next-item/recall_at_10 = 0.3483297526836395
 eval_/next-item/recall_at_20 = 0.45662716031074524
 eval_runtime = 3.0428
 eval_samples_per_second = 4879.651
 eval_steps_per_second = 9.531
********************
Launch training for day 21 are:
********************



***** Running training *****
  Num examples = 120960
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6300


Step,Training Loss
200,6.0374
400,6.0608
600,6.1045
800,6.004
1000,5.925
1200,5.979
1400,5.9476
1600,5.8527
1800,5.8727
2000,5.8837


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 22 are:	

********************

 eval_/loss = 6.429937839508057
 eval_/next-item/ndcg_at_10 = 0.19326630234718323
 eval_/next-item/ndcg_at_20 = 0.2204214334487915
 eval_/next-item/recall_at_10 = 0.34031519293785095
 eval_/next-item/recall_at_20 = 0.44760236144065857
 eval_runtime = 3.0061
 eval_samples_per_second = 4939.211
 eval_steps_per_second = 9.647
********************
Launch training for day 22 are:
********************



***** Running training *****
  Num examples = 122496
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6380


Step,Training Loss
200,6.0443
400,6.0421
600,6.1232
800,6.0212
1000,5.9275
1200,5.977
1400,6.0029
1600,5.8306
1800,5.8645
2000,5.9921


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 23 are:	

********************

 eval_/loss = 6.416492462158203
 eval_/next-item/ndcg_at_10 = 0.19466844201087952
 eval_/next-item/ndcg_at_20 = 0.22228966653347015
 eval_/next-item/recall_at_10 = 0.3415948152542114
 eval_/next-item/recall_at_20 = 0.45083513855934143
 eval_runtime = 3.1619
 eval_samples_per_second = 4695.866
 eval_steps_per_second = 9.172
********************
Launch training for day 23 are:
********************



***** Running training *****
  Num examples = 120384
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6270


Step,Training Loss
200,6.0368
400,6.0261
600,6.1197
800,5.9895
1000,5.8965
1200,6.0039
1400,5.9737
1600,5.8629
1800,5.8734
2000,5.9082


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 24 are:	

********************

 eval_/loss = 6.343203067779541
 eval_/next-item/ndcg_at_10 = 0.19529472291469574
 eval_/next-item/ndcg_at_20 = 0.2236960083246231
 eval_/next-item/recall_at_10 = 0.34577545523643494
 eval_/next-item/recall_at_20 = 0.45818865299224854
 eval_runtime = 2.8604
 eval_samples_per_second = 4832.862
 eval_steps_per_second = 9.439
********************
Launch training for day 24 are:
********************



***** Running training *****
  Num examples = 113280
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5900


Step,Training Loss
200,6.0109
400,5.963
600,6.104
800,5.9047
1000,5.9353
1200,5.9629
1400,5.866
1600,5.8195
1800,5.8815
2000,5.757


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 25 are:	

********************

 eval_/loss = 6.44890832901001
 eval_/next-item/ndcg_at_10 = 0.19576214253902435
 eval_/next-item/ndcg_at_20 = 0.22342072427272797
 eval_/next-item/recall_at_10 = 0.34680992364883423
 eval_/next-item/recall_at_20 = 0.4560547173023224
 eval_runtime = 3.1862
 eval_samples_per_second = 4820.766
 eval_steps_per_second = 9.416
********************
Launch training for day 25 are:
********************



***** Running training *****
  Num examples = 125376
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6530


Step,Training Loss
200,6.0237
400,6.0169
600,6.1085
800,6.0044
1000,5.902
1200,5.9186
1400,6.0029
1600,5.793
1800,5.8087
2000,5.9214


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 26 are:	

********************

 eval_/loss = 6.290228843688965
 eval_/next-item/ndcg_at_10 = 0.20443524420261383
 eval_/next-item/ndcg_at_20 = 0.23329918086528778
 eval_/next-item/recall_at_10 = 0.356987863779068
 eval_/next-item/recall_at_20 = 0.47099247574806213
 eval_runtime = 2.9607
 eval_samples_per_second = 4669.196
 eval_steps_per_second = 9.12
********************
Launch training for day 26 are:
********************



***** Running training *****
  Num examples = 114432
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5960


Step,Training Loss
200,5.9055
400,5.9126
600,6.0659
800,5.8563
1000,5.8152
1200,5.9322
1400,5.7545
1600,5.7615
1800,5.8548
2000,5.6889


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 27 are:	

********************

 eval_/loss = 6.302270889282227
 eval_/next-item/ndcg_at_10 = 0.20006254315376282
 eval_/next-item/ndcg_at_20 = 0.22765673696994781
 eval_/next-item/recall_at_10 = 0.3539496660232544
 eval_/next-item/recall_at_20 = 0.46317997574806213
 eval_runtime = 2.891
 eval_samples_per_second = 4781.654
 eval_steps_per_second = 9.339
********************
Launch training for day 27 are:
********************



***** Running training *****
  Num examples = 115776
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 6030


Step,Training Loss
200,5.8729
400,5.8813
600,6.0385
800,5.798
1000,5.7822
1200,5.8924
1400,5.7209
1600,5.714
1800,5.8009
2000,5.6764


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 28 are:	

********************

 eval_/loss = 6.258840084075928
 eval_/next-item/ndcg_at_10 = 0.20423176884651184
 eval_/next-item/ndcg_at_20 = 0.232116237282753
 eval_/next-item/recall_at_10 = 0.3579477369785309
 eval_/next-item/recall_at_20 = 0.4678485691547394
 eval_runtime = 2.9433
 eval_samples_per_second = 4522.842
 eval_steps_per_second = 8.834
********************
Launch training for day 28 are:
********************



***** Running training *****
  Num examples = 110016
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5730


Step,Training Loss
200,5.9218
400,5.9528
600,6.01
800,5.8122
1000,5.8342
1200,5.8898
1400,5.7475
1600,5.7663
1800,5.7734
2000,5.6726


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 29 are:	

********************

 eval_/loss = 6.339688301086426
 eval_/next-item/ndcg_at_10 = 0.20399215817451477
 eval_/next-item/ndcg_at_20 = 0.2319708913564682
 eval_/next-item/recall_at_10 = 0.3536718785762787
 eval_/next-item/recall_at_20 = 0.46421873569488525
 eval_runtime = 2.6365
 eval_samples_per_second = 4854.909
 eval_steps_per_second = 9.482
********************
Launch training for day 29 are:
********************



***** Running training *****
  Num examples = 107328
  Num Epochs = 10
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5590


Step,Training Loss
200,6.0085
400,5.9545
600,5.9953
800,5.869
1000,5.8347
1200,5.8938
1400,5.7848
1600,5.7827
1800,5.8071
2000,5.7028


Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to /content/drive/MyDrive/dataset_rees46/bert_cat+price+temp/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, onl

********************
Eval results for day 30 are:	

********************

 eval_/loss = 6.278590202331543
 eval_/next-item/ndcg_at_10 = 0.2014462798833847
 eval_/next-item/ndcg_at_20 = 0.2297505885362625
 eval_/next-item/recall_at_10 = 0.3546093702316284
 eval_/next-item/recall_at_20 = 0.4665624797344208
 eval_runtime = 2.6784
 eval_samples_per_second = 4778.903
 eval_steps_per_second = 9.334
CPU times: user 2h 54min 35s, sys: 2min 46s, total: 2h 57min 21s
Wall time: 3h 6min 1s


In [None]:
print("Results:")
for key, value in  model.compute_metrics().items(): 
  print('%s: %s ' % (key, value.item()))

In [None]:
with open("/content/drive/MyDrive/dataset_rees46/results.txt", 'a') as f: 
    f.write('\n')
    f.write('Bert with categ feat + price feat + temp feat results:')
    f.write('\n')
    for key, value in  model.compute_metrics().items(): 
        f.write('%s: %s' % (key, value.item()))