# GRU4REC 

In [None]:
!pip install transformers4rec[pytorch,nvtabular]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers4rec[nvtabular,pytorch]
  Downloading transformers4rec-0.1.16.tar.gz (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting betterproto<2.0.0
  Downloading betterproto-1.2.5.tar.gz (26 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting transformers<4.19
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.0/4.0 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.10.0
  Downloading tor

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import glob

import torch 
import transformers4rec.torch as tr

from transformers4rec.torch.ranking_metric import NDCGAt, RecallAt
from transformers4rec.torch.utils.examples_utils import wipe_memory

In [None]:
from merlin_standard_lib import Schema

# Define schema object to pass it to the TabularSequenceFeatures class
SCHEMA_PATH = '/content/drive/MyDrive/dataset_rees46/processed_nvt/schema.pbtxt'
schema = Schema().from_proto_text(SCHEMA_PATH)

# Create a sub-schema only with the selected features
schema = schema.select_by_name(['product_id-list_seq'])

In [None]:
schema

[{'name': 'product_id-list_seq', 'value_count': {'min': '2', 'max': '20'}, 'type': 'INT', 'int_domain': {'name': 'product_id', 'max': '166795', 'is_categorical': True}, 'annotation': {'tag': ['list', 'item_id', 'id', 'categorical', 'item'], 'comment': ['{"is_ragged": true, "embedding_sizes": {"dimension": 512.0, "cardinality": 166796.0}, "is_list": true, "freq_threshold": 0.0, "dtype_item_size": 64.0, "start_index": 1.0, "cat_path": ".//categories/unique.product_id.parquet", "max_size": 0.0, "num_buckets": null}']}}]

In [None]:
sequence_length = 20
inputs = tr.TabularSequenceFeatures.from_schema(
        schema,
        max_sequence_length= sequence_length,
        masking = 'causal',
    )

In [None]:
d_model = 128
body = tr.SequentialBlock(
        inputs, 
        tr.MLPBlock([d_model]), #projection MLP layer
        tr.Block(torch.nn.GRU(input_size=d_model, 
                              hidden_size=d_model, 
                              num_layers=1, 
                              dropout=0.0, #regularization
                              ), \
                 [None, 20, d_model]) #GRU model
)

In [None]:
head = tr.Head(
    body,
    tr.NextItemPredictionTask(weight_tying=True, 
                              metrics=[NDCGAt(top_ks=[10, 20], labels_onehot=True),  
                                       RecallAt(top_ks=[10, 20], labels_onehot=True)]),
)

model = tr.Model(head)



In [None]:
# import NVTabular dependencies
from transformers4rec.torch.utils.data_utils import MerlinDataLoader

x_cat_names, x_cont_names = ['product_id-list_seq'], [] #categorical and continuous features

# dictionary representing max sequence length for each column - needed in the dataloader function
# [column_name: integer] as [key: value]
sparse_features_max = {
    fname: sequence_length for fname in x_cat_names + x_cont_names
}

# Define a `get_dataloader` function to call in the training loop
def get_dataloader(path, batch_size=32):

    return MerlinDataLoader.from_schema(
        schema, 
        path, 
        batch_size, #number of samples to yield at each iteration
        max_sequence_length=sequence_length, 
        sparse_names=x_cat_names + x_cont_names, #list with column names of columns that should be represented as sparse tensors
        sparse_max=sparse_features_max, #dictionary of [column_name: integer] as [key: value], representing max sequence lenght for each column
)
    

In [None]:
from transformers4rec.config.trainer import T4RecTrainingArguments
from transformers4rec.torch import Trainer

#Set arguments for training 
train_args = T4RecTrainingArguments(local_rank = -1, 
                                    dataloader_drop_last = False,
                                    report_to = [],   #set empty list to avoid logging metrics to Weights&Biases
                                    gradient_accumulation_steps = 1,
                                    per_device_train_batch_size = 448, 
                                    per_device_eval_batch_size = 512,
                                    output_dir = "./gru", 
                                    max_sequence_length=sequence_length,
                                    learning_rate=0.0007107976722774954,
                                    num_train_epochs=10,
                                    logging_steps=200,
                                    weight_decay=4.0070030423993165e-06,         
                                   )

In [None]:
# Instantiate the T4Rec Trainer, which manages training and evaluation
trainer = Trainer(
    model=model,
    args=train_args,
    schema=schema,
    compute_metrics=True,
)

In [None]:
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/content/drive/MyDrive/dataset_rees46/sessions_by_day")

In [None]:
import time


start_time_window_index = 1
final_time_window_index = 30
for time_index in range(start_time_window_index, final_time_window_index):
    # Set data 
    time_index_train = time_index
    time_index_eval = time_index + 1
    train_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_train}/train.parquet"))
    eval_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_eval}/test.parquet"))
    
    # Initialize dataloaders
    trainer.train_dataloader = get_dataloader(train_paths, train_args.per_device_train_batch_size)
    trainer.eval_dataloader = get_dataloader(eval_paths, train_args.per_device_eval_batch_size)
    
    # Train on day related to time_index 
    print('*'*20)
    print("Launch training for day %s are:" %time_index)
    print('*'*20 + '\n')
    trainer.reset_lr_scheduler()
    trainer.train()
    trainer.state.global_step +=1
    
    # Evaluate on the following day
    train_metrics = trainer.evaluate(metric_key_prefix='eval')
    print('*'*20)
    print("Eval results for day %s are:\t" %time_index_eval)
    print('\n' + '*'*20 + '\n')
    for key in sorted(train_metrics.keys()):
        print(" %s = %s" % (key, str(train_metrics[key]))) 
    wipe_memory()
    time.sleep(10)

***** Running training *****
  Num examples = 112448
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2510


********************
Launch training for day 1 are:
********************



Step,Training Loss
200,10.0304
400,9.0336
600,8.8335
800,8.6725
1000,8.5121
1200,8.3599
1400,8.2422
1600,8.164
1800,8.09
2000,8.0294


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 2 are:	

********************

 eval_/loss = 8.556951522827148
 eval_/next-item/ndcg_at_10 = 0.06896374374628067
 eval_/next-item/ndcg_at_20 = 0.08415067195892334
 eval_/next-item/recall_at_10 = 0.12966495752334595
 eval_/next-item/recall_at_20 = 0.1898038536310196
 eval_runtime = 2.2688
 eval_samples_per_second = 5867.406
 eval_steps_per_second = 11.46


***** Running training *****
  Num examples = 106176
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2370


********************
Launch training for day 2 are:
********************



Step,Training Loss
200,8.5269
400,8.1975
600,8.0033
800,7.8244
1000,7.665
1200,7.5456
1400,7.4221
1600,7.3137
1800,7.2501
2000,7.2015


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 3 are:	

********************

 eval_/loss = 8.200451850891113
 eval_/next-item/ndcg_at_10 = 0.10441578179597855
 eval_/next-item/ndcg_at_20 = 0.12525855004787445
 eval_/next-item/recall_at_10 = 0.19518917798995972
 eval_/next-item/recall_at_20 = 0.2776246666908264
 eval_runtime = 2.0975
 eval_samples_per_second = 5858.312
 eval_steps_per_second = 11.442


***** Running training *****
  Num examples = 98112
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2190


********************
Launch training for day 3 are:
********************



Step,Training Loss
200,7.8994
400,7.5501
600,7.3241
800,7.1369
1000,7.0223
1200,6.8911
1400,6.8176
1600,6.731
1800,6.6769
2000,6.6164


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 4 are:	

********************

 eval_/loss = 7.667614936828613
 eval_/next-item/ndcg_at_10 = 0.13246335089206696
 eval_/next-item/ndcg_at_20 = 0.1555863469839096
 eval_/next-item/recall_at_10 = 0.23896509408950806
 eval_/next-item/recall_at_20 = 0.3308197259902954
 eval_runtime = 2.5084
 eval_samples_per_second = 6327.482
 eval_steps_per_second = 12.358


***** Running training *****
  Num examples = 124544
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2780


********************
Launch training for day 4 are:
********************



Step,Training Loss
200,7.409
400,7.1059
600,6.9638
800,6.8378
1000,6.7048
1200,6.6174
1400,6.5569
1600,6.4684
1800,6.4162
2000,6.3642


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 5 are:	

********************

 eval_/loss = 7.268002033233643
 eval_/next-item/ndcg_at_10 = 0.14750072360038757
 eval_/next-item/ndcg_at_20 = 0.1712849885225296
 eval_/next-item/recall_at_10 = 0.269091933965683
 eval_/next-item/recall_at_20 = 0.36343690752983093
 eval_runtime = 2.2245
 eval_samples_per_second = 6444.576
 eval_steps_per_second = 12.587


***** Running training *****
  Num examples = 114688
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2560


********************
Launch training for day 5 are:
********************



Step,Training Loss
200,7.1118
400,6.8533
600,6.6795
800,6.5694
1000,6.4734
1200,6.378
1400,6.3126
1600,6.2646
1800,6.214
2000,6.1586


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 6 are:	

********************

 eval_/loss = 6.962644577026367
 eval_/next-item/ndcg_at_10 = 0.16212917864322662
 eval_/next-item/ndcg_at_20 = 0.18853668868541718
 eval_/next-item/recall_at_10 = 0.2926005721092224
 eval_/next-item/recall_at_20 = 0.39683908224105835
 eval_runtime = 2.1952
 eval_samples_per_second = 6530.634
 eval_steps_per_second = 12.755


***** Running training *****
  Num examples = 112896
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2520


********************
Launch training for day 6 are:
********************



Step,Training Loss
200,6.8637
400,6.6357
600,6.4989
800,6.3935
1000,6.3216
1200,6.2264
1400,6.1741
1600,6.107
1800,6.086
2000,6.0413


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 7 are:	

********************

 eval_/loss = 7.230011940002441
 eval_/next-item/ndcg_at_10 = 0.1603071689605713
 eval_/next-item/ndcg_at_20 = 0.18604101240634918
 eval_/next-item/recall_at_10 = 0.2810012102127075
 eval_/next-item/recall_at_20 = 0.3828077018260956
 eval_runtime = 2.1082
 eval_samples_per_second = 6314.475
 eval_steps_per_second = 12.333


***** Running training *****
  Num examples = 106176
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2370


********************
Launch training for day 7 are:
********************



Step,Training Loss
200,6.9221
400,6.6404
600,6.4786
800,6.387
1000,6.3064
1200,6.2304
1400,6.1678
1600,6.0954
1800,6.06
2000,6.0346


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 8 are:	

********************

 eval_/loss = 7.191165924072266
 eval_/next-item/ndcg_at_10 = 0.16094809770584106
 eval_/next-item/ndcg_at_20 = 0.18656374514102936
 eval_/next-item/recall_at_10 = 0.2880041301250458
 eval_/next-item/recall_at_20 = 0.3891545832157135
 eval_runtime = 2.7165
 eval_samples_per_second = 5842.721
 eval_steps_per_second = 11.412


***** Running training *****
  Num examples = 125440
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2800


********************
Launch training for day 8 are:
********************



Step,Training Loss
200,6.9307
400,6.6662
600,6.5377
800,6.3971
1000,6.2913
1200,6.2262
1400,6.1761
1600,6.0907
1800,6.0345
2000,6.0174


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 9 are:	

********************

 eval_/loss = 7.055368900299072
 eval_/next-item/ndcg_at_10 = 0.16505153477191925
 eval_/next-item/ndcg_at_20 = 0.19078698754310608
 eval_/next-item/recall_at_10 = 0.2921694815158844
 eval_/next-item/recall_at_20 = 0.3941405117511749
 eval_runtime = 2.352
 eval_samples_per_second = 6530.719
 eval_steps_per_second = 12.755


***** Running training *****
  Num examples = 120960
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2700


********************
Launch training for day 9 are:
********************



Step,Training Loss
200,6.8338
400,6.6091
600,6.45
800,6.3652
1000,6.2418
1200,6.1792
1400,6.1451
1600,6.0771
1800,6.0166
2000,5.9886


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 10 are:	

********************

 eval_/loss = 7.052510738372803
 eval_/next-item/ndcg_at_10 = 0.16450147330760956
 eval_/next-item/ndcg_at_20 = 0.1904015839099884
 eval_/next-item/recall_at_10 = 0.29021787643432617
 eval_/next-item/recall_at_20 = 0.3928004801273346
 eval_runtime = 2.2077
 eval_samples_per_second = 6493.743
 eval_steps_per_second = 12.683


***** Running training *****
  Num examples = 112448
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2510


********************
Launch training for day 10 are:
********************



Step,Training Loss
200,6.8148
400,6.5642
600,6.4169
800,6.3155
1000,6.2471
1200,6.1559
1400,6.0791
1600,6.0641
1800,6.0179
2000,5.9797


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 11 are:	

********************

 eval_/loss = 7.06924295425415
 eval_/next-item/ndcg_at_10 = 0.16303104162216187
 eval_/next-item/ndcg_at_20 = 0.18931087851524353
 eval_/next-item/recall_at_10 = 0.286636620759964
 eval_/next-item/recall_at_20 = 0.3904251456260681
 eval_runtime = 2.6776
 eval_samples_per_second = 6310.149
 eval_steps_per_second = 12.325


***** Running training *****
  Num examples = 133056
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2970


********************
Launch training for day 11 are:
********************



Step,Training Loss
200,6.8417
400,6.6464
600,6.5162
800,6.3806
1000,6.3364
1200,6.2866
1400,6.1974
1600,6.1568
1800,6.1391
2000,6.0757


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 12 are:	

********************

 eval_/loss = 6.8026628494262695
 eval_/next-item/ndcg_at_10 = 0.17655861377716064
 eval_/next-item/ndcg_at_20 = 0.2028297632932663
 eval_/next-item/recall_at_10 = 0.308700829744339
 eval_/next-item/recall_at_20 = 0.4125211536884308
 eval_runtime = 2.5368
 eval_samples_per_second = 6458.644
 eval_steps_per_second = 12.615


***** Running training *****
  Num examples = 129024
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2880


********************
Launch training for day 12 are:
********************



Step,Training Loss
200,6.7019
400,6.5056
600,6.403
800,6.2718
1000,6.1969
1200,6.1657
1400,6.1038
1600,6.0452
1800,6.0097
2000,5.9916


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 13 are:	

********************

 eval_/loss = 6.782118797302246
 eval_/next-item/ndcg_at_10 = 0.17487448453903198
 eval_/next-item/ndcg_at_20 = 0.20216841995716095
 eval_/next-item/recall_at_10 = 0.3064239025115967
 eval_/next-item/recall_at_20 = 0.41469138860702515
 eval_runtime = 2.7744
 eval_samples_per_second = 6459.141
 eval_steps_per_second = 12.616


***** Running training *****
  Num examples = 141568
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 3160


********************
Launch training for day 13 are:
********************



Step,Training Loss
200,6.627
400,6.4851
600,6.3586
800,6.2437
1000,6.2165
1200,6.1389
1400,6.092
1600,6.0584
1800,6.0002
2000,5.9883


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 14 are:	

********************

 eval_/loss = 6.873451232910156
 eval_/next-item/ndcg_at_10 = 0.17688722908496857
 eval_/next-item/ndcg_at_20 = 0.20279911160469055
 eval_/next-item/recall_at_10 = 0.3109941780567169
 eval_/next-item/recall_at_20 = 0.4137078523635864
 eval_runtime = 2.519
 eval_samples_per_second = 6300.862
 eval_steps_per_second = 12.306


***** Running training *****
  Num examples = 127680
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2850


********************
Launch training for day 14 are:
********************



Step,Training Loss
200,6.6871
400,6.4827
600,6.3791
800,6.2574
1000,6.1681
1200,6.1561
1400,6.0763
1600,6.0249
1800,5.9895
2000,5.9813


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 15 are:	

********************

 eval_/loss = 6.913034915924072
 eval_/next-item/ndcg_at_10 = 0.17673413455486298
 eval_/next-item/ndcg_at_20 = 0.20286160707473755
 eval_/next-item/recall_at_10 = 0.3078886568546295
 eval_/next-item/recall_at_20 = 0.41146790981292725
 eval_runtime = 2.7392
 eval_samples_per_second = 6168.323
 eval_steps_per_second = 12.048


***** Running training *****
  Num examples = 136640
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 3050


********************
Launch training for day 15 are:
********************



Step,Training Loss
200,6.7127
400,6.5207
600,6.42
800,6.274
1000,6.2197
1200,6.1847
1400,6.0851
1600,6.0823
1800,6.0347
2000,5.9731


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 16 are:	

********************

 eval_/loss = 6.816497802734375
 eval_/next-item/ndcg_at_10 = 0.17237703502178192
 eval_/next-item/ndcg_at_20 = 0.20014719665050507
 eval_/next-item/recall_at_10 = 0.3005322217941284
 eval_/next-item/recall_at_20 = 0.41066890954971313
 eval_runtime = 2.629
 eval_samples_per_second = 6426.799
 eval_steps_per_second = 12.552


***** Running training *****
  Num examples = 133952
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2990


********************
Launch training for day 16 are:
********************



Step,Training Loss
200,6.6986
400,6.5498
600,6.4187
800,6.2882
1000,6.2414
1200,6.2131
1400,6.1178
1600,6.0984
1800,6.0611
2000,6.0172


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 17 are:	

********************

 eval_/loss = 6.8630051612854
 eval_/next-item/ndcg_at_10 = 0.17371462285518646
 eval_/next-item/ndcg_at_20 = 0.2008541077375412
 eval_/next-item/recall_at_10 = 0.3070470988750458
 eval_/next-item/recall_at_20 = 0.4142394959926605
 eval_runtime = 2.4097
 eval_samples_per_second = 6374.352
 eval_steps_per_second = 12.45


***** Running training *****
  Num examples = 122752
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2740


********************
Launch training for day 17 are:
********************



Step,Training Loss
200,6.7261
400,6.5056
600,6.4032
800,6.277
1000,6.1879
1200,6.1375
1400,6.1009
1600,6.0424
1800,5.9784
2000,5.9844


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 18 are:	

********************

 eval_/loss = 6.938830375671387
 eval_/next-item/ndcg_at_10 = 0.17115545272827148
 eval_/next-item/ndcg_at_20 = 0.19789279997348785
 eval_/next-item/recall_at_10 = 0.29710325598716736
 eval_/next-item/recall_at_20 = 0.40292173624038696
 eval_runtime = 2.7856
 eval_samples_per_second = 5881.63
 eval_steps_per_second = 11.488


***** Running training *****
  Num examples = 129472
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2890


********************
Launch training for day 18 are:
********************



Step,Training Loss
200,6.7793
400,6.577
600,6.4499
800,6.3476
1000,6.2557
1200,6.218
1400,6.1683
1600,6.0948
1800,6.1
2000,6.046


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 19 are:	

********************

 eval_/loss = 6.758204460144043
 eval_/next-item/ndcg_at_10 = 0.1762077808380127
 eval_/next-item/ndcg_at_20 = 0.2018938809633255
 eval_/next-item/recall_at_10 = 0.30744338035583496
 eval_/next-item/recall_at_20 = 0.40922001004219055
 eval_runtime = 2.6961
 eval_samples_per_second = 5697.12
 eval_steps_per_second = 11.127


***** Running training *****
  Num examples = 122752
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2740


********************
Launch training for day 19 are:
********************



Step,Training Loss
200,6.68
400,6.4961
600,6.372
800,6.2686
1000,6.1893
1200,6.1333
1400,6.1129
1600,6.0383
1800,6.023
2000,5.9737


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 20 are:	

********************

 eval_/loss = 6.657688617706299
 eval_/next-item/ndcg_at_10 = 0.1762889325618744
 eval_/next-item/ndcg_at_20 = 0.20377801358699799
 eval_/next-item/recall_at_10 = 0.3105581998825073
 eval_/next-item/recall_at_20 = 0.4195215106010437
 eval_runtime = 2.491
 eval_samples_per_second = 6371.642
 eval_steps_per_second = 12.445


***** Running training *****
  Num examples = 126784
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2830


********************
Launch training for day 20 are:
********************



Step,Training Loss
200,6.536
400,6.3853
600,6.2565
800,6.1664
1000,6.0966
1200,6.0422
1400,6.0133
1600,5.9393
1800,5.9133
2000,5.9043


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 21 are:	

********************

 eval_/loss = 6.585776329040527
 eval_/next-item/ndcg_at_10 = 0.17935973405838013
 eval_/next-item/ndcg_at_20 = 0.20785623788833618
 eval_/next-item/recall_at_10 = 0.31589511036872864
 eval_/next-item/recall_at_20 = 0.42841851711273193
 eval_runtime = 2.3874
 eval_samples_per_second = 6433.678
 eval_steps_per_second = 12.566


***** Running training *****
  Num examples = 121408
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2710


********************
Launch training for day 21 are:
********************



Step,Training Loss
200,6.5344
400,6.3407
600,6.2222
800,6.1395
1000,6.0461
1200,6.0101
1400,5.9499
1600,5.9095
1800,5.8674
2000,5.8507


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 22 are:	

********************

 eval_/loss = 6.713311195373535
 eval_/next-item/ndcg_at_10 = 0.17409691214561462
 eval_/next-item/ndcg_at_20 = 0.20020562410354614
 eval_/next-item/recall_at_10 = 0.3065548837184906
 eval_/next-item/recall_at_20 = 0.4099280536174774
 eval_runtime = 2.4406
 eval_samples_per_second = 6293.439
 eval_steps_per_second = 12.292


***** Running training *****
  Num examples = 122752
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2740


********************
Launch training for day 22 are:
********************



Step,Training Loss
200,6.546
400,6.3521
600,6.2483
800,6.1552
1000,6.0514
1200,6.0307
1400,5.9715
1600,5.9342
1800,5.8861
2000,5.8605


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 23 are:	

********************

 eval_/loss = 6.694944381713867
 eval_/next-item/ndcg_at_10 = 0.17823679745197296
 eval_/next-item/ndcg_at_20 = 0.20460617542266846
 eval_/next-item/recall_at_10 = 0.31222692131996155
 eval_/next-item/recall_at_20 = 0.41668346524238586
 eval_runtime = 2.3627
 eval_samples_per_second = 6501.133
 eval_steps_per_second = 12.698


***** Running training *****
  Num examples = 120512
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2690


********************
Launch training for day 23 are:
********************



Step,Training Loss
200,6.5654
400,6.3621
600,6.2448
800,6.1709
1000,6.0649
1200,6.0303
1400,5.9777
1600,5.9406
1800,5.8917
2000,5.8805


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 24 are:	

********************

 eval_/loss = 6.676004409790039
 eval_/next-item/ndcg_at_10 = 0.1763627827167511
 eval_/next-item/ndcg_at_20 = 0.20387418568134308
 eval_/next-item/recall_at_10 = 0.30993273854255676
 eval_/next-item/recall_at_20 = 0.41899242997169495
 eval_runtime = 2.249
 eval_samples_per_second = 6374.472
 eval_steps_per_second = 12.45


***** Running training *****
  Num examples = 113792
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2540


********************
Launch training for day 24 are:
********************



Step,Training Loss
200,6.559
400,6.3353
600,6.2196
800,6.1345
1000,6.0608
1200,5.9952
1400,5.9326
1600,5.8951
1800,5.8747
2000,5.8467


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 25 are:	

********************

 eval_/loss = 6.707900524139404
 eval_/next-item/ndcg_at_10 = 0.17580153048038483
 eval_/next-item/ndcg_at_20 = 0.20262713730335236
 eval_/next-item/recall_at_10 = 0.3115493953227997
 eval_/next-item/recall_at_20 = 0.41763341426849365
 eval_runtime = 2.5244
 eval_samples_per_second = 6287.344
 eval_steps_per_second = 12.28


***** Running training *****
  Num examples = 125440
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2800


********************
Launch training for day 25 are:
********************



Step,Training Loss
200,6.5312
400,6.352
600,6.224
800,6.135
1000,6.0481
1200,6.0207
1400,5.9676
1600,5.9105
1800,5.8636
2000,5.8642


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 26 are:	

********************

 eval_/loss = 6.588900089263916
 eval_/next-item/ndcg_at_10 = 0.18622387945652008
 eval_/next-item/ndcg_at_20 = 0.2137880176305771
 eval_/next-item/recall_at_10 = 0.32405832409858704
 eval_/next-item/recall_at_20 = 0.4331634044647217
 eval_runtime = 2.2462
 eval_samples_per_second = 6382.301
 eval_steps_per_second = 12.465


***** Running training *****
  Num examples = 114688
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2560


********************
Launch training for day 26 are:
********************



Step,Training Loss
200,6.4774
400,6.2797
600,6.1672
800,6.0926
1000,6.0183
1200,5.957
1400,5.9092
1600,5.8787
1800,5.8526
2000,5.8127


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 27 are:	

********************

 eval_/loss = 6.601047992706299
 eval_/next-item/ndcg_at_10 = 0.18162962794303894
 eval_/next-item/ndcg_at_20 = 0.20858973264694214
 eval_/next-item/recall_at_10 = 0.318111777305603
 eval_/next-item/recall_at_20 = 0.42470934987068176
 eval_runtime = 2.2975
 eval_samples_per_second = 6239.842
 eval_steps_per_second = 12.187


***** Running training *****
  Num examples = 116032
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2590


********************
Launch training for day 27 are:
********************



Step,Training Loss
200,6.4368
400,6.243
600,6.1425
800,6.0508
1000,5.9833
1200,5.9159
1400,5.877
1600,5.8576
1800,5.8061
2000,5.781


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 28 are:	

********************

 eval_/loss = 6.605090141296387
 eval_/next-item/ndcg_at_10 = 0.18436750769615173
 eval_/next-item/ndcg_at_20 = 0.21161752939224243
 eval_/next-item/recall_at_10 = 0.32083120942115784
 eval_/next-item/recall_at_20 = 0.4284135401248932
 eval_runtime = 2.2324
 eval_samples_per_second = 6192.535
 eval_steps_per_second = 12.095


***** Running training *****
  Num examples = 110208
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2460


********************
Launch training for day 28 are:
********************



Step,Training Loss
200,6.4675
400,6.2438
600,6.1506
800,6.0334
1000,5.9875
1200,5.9185
1400,5.8638
1600,5.8269
1800,5.8026
2000,5.774


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 29 are:	

********************

 eval_/loss = 6.6611480712890625
 eval_/next-item/ndcg_at_10 = 0.18025419116020203
 eval_/next-item/ndcg_at_20 = 0.20728495717048645
 eval_/next-item/recall_at_10 = 0.31490421295166016
 eval_/next-item/recall_at_20 = 0.42200934886932373
 eval_runtime = 2.3415
 eval_samples_per_second = 5685.173
 eval_steps_per_second = 11.104


***** Running training *****
  Num examples = 107520
  Num Epochs = 10
  Instantaneous batch size per device = 448
  Total train batch size (w. parallel, distributed & accumulation) = 448
  Gradient Accumulation steps = 1
  Total optimization steps = 2400


********************
Launch training for day 29 are:
********************



Step,Training Loss
200,6.5277
400,6.2923
600,6.1743
800,6.0645
1000,6.0264
1200,5.9538
1400,5.8979
1600,5.8542
1800,5.8287
2000,5.8035


Saving model checkpoint to ./gru/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./gru/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




********************
Eval results for day 30 are:	

********************

 eval_/loss = 6.6362175941467285
 eval_/next-item/ndcg_at_10 = 0.1758965700864792
 eval_/next-item/ndcg_at_20 = 0.20357991755008698
 eval_/next-item/recall_at_10 = 0.3126915991306305
 eval_/next-item/recall_at_20 = 0.42205703258514404
 eval_runtime = 2.0994
 eval_samples_per_second = 6340.719
 eval_steps_per_second = 12.384


In [None]:
with open("/content/drive/MyDrive/dataset_rees46/results.txt", 'a') as f: 
    f.write('\n')
    f.write('GRU accuracy results:')
    f.write('\n')
    for key, value in  model.compute_metrics().items(): 
        f.write('%s: %s' % (key, value.item()))

In [None]:
print("Results:")
for key, value in  model.compute_metrics().items(): 
  print('\n%s: %s ' % (key, value.item()))

Results:
next-item/ndcg_at_10: 0.1758965700864792 
next-item/ndcg_at_20: 0.20357991755008698 
next-item/recall_at_10: 0.3126915991306305 
next-item/recall_at_20: 0.42205703258514404 
