In [1]:
import sys
import os

sys.path.append("/home/laststar/source/open-llm-rec/source")

In [2]:
import easydict

In [3]:
# jupyter didn't support argparse. so, I use 'easydict' module
args = easydict.EasyDict({
    ################
    # Dataset
    ################
    'dataset_code': 'ml-100k', # ml-100k, beauty, games
    'min_rating': 0,  # default: 0
    'min_uc': 5,  # default: 5
    'min_sc': 5,  # default: 5
    'seed': 42,  # default: 42

    ################
    # Dataloader
    ################
    'train_batch_size': 64,  # default: 64
    'val_batch_size': 64,  # default: 64
    'test_batch_size': 64,  # default: 64
    'num_workers': 0,  # default: 8
    'sliding_window_size': 1.0,  # default: 1.0
    'negative_sample_size': 10,  # default: 10

    ################
    # Trainer
    ################
    # optimization #
    'device': 'cuda',  # default: 'cuda'  # choices: ['cpu', 'cuda']
    'num_epochs': 500,  # default: 500
    'optimizer': 'AdamW',  # default: 'AdamW'  # choices: ['AdamW', 'Adam']
    'weight_decay': 0.01,  # default: None
    'adam_epsilon': 1e-9,  # default: 1e-9
    'momentum': None,  # default: None
    'lr': 0.001,  # default: 0.001
    'max_grad_norm': 5.0,  # default: 5.0
    'enable_lr_schedule': True,  # default: True
    'decay_step': 10000,  # default: 10000
    'gamma': 1,  # default: 1
    'enable_lr_warmup': True,  # default: True
    'warmup_steps': 100,  # default: 100

    # evaluation #
    'val_strategy': 'iteration',  # default: 'iteration'  # choices: ['epoch', 'iteration']
    'val_iterations': 500,  # default: 500  # only for iteration val_strategy
    'early_stopping': True,  # default: True
    'early_stopping_patience': 20,  # default: 20
    'metric_ks': [1, 5, 10, 20, 50],  # default: [1, 5, 10, 20, 50]
    'rerank_metric_ks': [1, 5, 10],  # default: [1, 5, 10]
    'best_metric': 'Recall@10',  # default: 'Recall@10'
    'rerank_best_metric': 'NDCG@10',  # default: 'NDCG@10'
    'use_wandb': False,  # default: False

    ################
    # Retriever Model
    ################
    'model_code': 'lru',  # default: None
    'bert_max_len': 50,  # default: 50
    'bert_hidden_units': 64,  # default: 64
    'bert_num_blocks': 2,  # default: 2
    'bert_num_heads': 2,  # default: 2
    'bert_head_size': 32,  # default: 32
    'bert_dropout': 0.2,  # default: 0.2
    'bert_attn_dropout': 0.2,  # default: 0.2
    'bert_mask_prob': 0.25,  # default: 0.25

    ################
    # LLM Model
    ################
    'llm_base_model': 'meta-llama/Llama-2-7b-hf',  # default: 'meta-llama/Llama-2-7b-hf'
    'llm_base_tokenizer': 'meta-llama/Llama-2-7b-hf',  # default: 'meta-llama/Llama-2-7b-hf'
    'llm_max_title_len': 32,  # default: 32
    'llm_max_text_len': 1536,  # default: 1536
    'llm_max_history': 20,  # default: 20
    'llm_train_on_inputs': False,  # default: False
    'llm_negative_sample_size': 19,  # default: 19  # 19 negative & 1 positive
    'llm_system_template': "Given user history in chronological order, recommend an item from the candidate pool with its index letter.",  # default: "Given user history in chronological order, recommend an item from the candidate pool with its index letter."
    'llm_input_template': 'User history: {}; \n Candidate pool: {}',  # default: 'User history: {}; \n Candidate pool: {}'
    'llm_load_in_4bit': True,  # default: True
    'llm_retrieved_path': None,  # default: None
    'llm_cache_dir': None,  # default: None

    ################
    # Lora
    ################
    'lora_r': 8,  # default: 8
    'lora_alpha': 32,  # default: 32
    'lora_dropout': 0.05,  # default: 0.05
    'lora_target_modules': ['q_proj', 'v_proj'],  # default: ['q_proj', 'v_proj']
    'lora_num_epochs': 1,  # default: 1
    'lora_val_iterations': 100,  # default: 100
    'lora_early_stopping_patience': 20,  # default: 20
    'lora_lr': 1e-4,  # default: 1e-4
    'lora_micro_batch_size': 16,  # default: 16
})


# Dataset Loader

In [4]:
import data.datasets
import data.dataloader
from data.dataloader import *
from data.datasets import *

In [5]:
train, val, test = dataloader_factory(args)

Already preprocessed. Skip preprocessing


# Train

In [6]:
import os
import torch

os.environ['TOKENIZERS_PARALLELISM'] = 'false'

import wandb
import argparse

from config import *
from model import *
from data.dataloader import*
from trainer import *

In [7]:
export_root = "/home/laststar/data/model/open-llm-rec"

In [8]:
model = LRURec(args)



In [41]:
trainer = LRUTrainer(args, model, train, val, test, export_root, args.use_wandb)

{'dataset_code': 'ml-100k', 'min_rating': 0, 'min_uc': 5, 'min_sc': 5, 'seed': 42, 'train_batch_size': 64, 'val_batch_size': 64, 'test_batch_size': 64, 'num_workers': 0, 'sliding_window_size': 1.0, 'negative_sample_size': 10, 'device': 'cuda', 'num_epochs': 500, 'optimizer': 'AdamW', 'weight_decay': 0.01, 'adam_epsilon': 1e-09, 'momentum': None, 'lr': 0.001, 'max_grad_norm': 5.0, 'enable_lr_schedule': True, 'decay_step': 10000, 'gamma': 1, 'enable_lr_warmup': True, 'warmup_steps': 100, 'val_strategy': 'iteration', 'val_iterations': 500, 'early_stopping': True, 'early_stopping_patience': 20, 'metric_ks': [1, 5, 10, 20, 50], 'rerank_metric_ks': [1, 5, 10], 'best_metric': 'Recall@10', 'rerank_best_metric': 'NDCG@10', 'use_wandb': False, 'model_code': 'lru', 'bert_max_len': 50, 'bert_hidden_units': 64, 'bert_num_blocks': 2, 'bert_num_heads': 2, 'bert_head_size': 32, 'bert_dropout': 0.2, 'bert_attn_dropout': 0.2, 'bert_mask_prob': 0.25, 'llm_base_model': 'meta-llama/Llama-2-7b-hf', 'llm_bas

In [42]:
trainer.train()

Eval: N@1 0.0000, N@5 0.0000, N@10 0.0000, R@1 0.0000, R@5 0.0000, R@10 0.0000: 100%|██████████| 10/10 [00:00<00:00, 35.49it/s]
Epoch 1, loss 8.211 : 100%|██████████| 27/27 [00:01<00:00, 21.16it/s]
Epoch 2, loss 8.177 : 100%|██████████| 27/27 [00:01<00:00, 20.28it/s]
Epoch 3, loss 7.969 : 100%|██████████| 27/27 [00:01<00:00, 20.19it/s]
Epoch 4, loss 7.659 : 100%|██████████| 27/27 [00:01<00:00, 20.51it/s]
Epoch 5, loss 7.491 : 100%|██████████| 27/27 [00:01<00:00, 20.39it/s]
Epoch 6, loss 7.310 : 100%|██████████| 27/27 [00:01<00:00, 20.32it/s]
Epoch 7, loss 7.129 : 100%|██████████| 27/27 [00:01<00:00, 21.58it/s]
Epoch 8, loss 6.981 : 100%|██████████| 27/27 [00:01<00:00, 20.97it/s]
Epoch 9, loss 6.859 : 100%|██████████| 27/27 [00:01<00:00, 22.03it/s]
Epoch 10, loss 6.752 : 100%|██████████| 27/27 [00:01<00:00, 22.79it/s]
Epoch 11, loss 6.659 : 100%|██████████| 27/27 [00:01<00:00, 22.08it/s]
Epoch 12, loss 6.570 : 100%|██████████| 27/27 [00:01<00:00, 20.54it/s]
Epoch 13, loss 6.489 : 100%|█

Update Best Recall@10 Model at 19


Epoch 19, loss 6.112 : 100%|██████████| 27/27 [00:01<00:00, 16.72it/s]
Epoch 20, loss 6.062 : 100%|██████████| 27/27 [00:01<00:00, 21.14it/s]
Epoch 21, loss 6.014 : 100%|██████████| 27/27 [00:01<00:00, 21.00it/s]
Epoch 22, loss 5.973 : 100%|██████████| 27/27 [00:01<00:00, 20.51it/s]
Epoch 23, loss 5.932 : 100%|██████████| 27/27 [00:01<00:00, 21.32it/s]
Epoch 24, loss 5.894 : 100%|██████████| 27/27 [00:01<00:00, 23.98it/s]
Epoch 25, loss 5.857 : 100%|██████████| 27/27 [00:01<00:00, 23.48it/s]
Epoch 26, loss 5.821 : 100%|██████████| 27/27 [00:01<00:00, 20.41it/s]
Epoch 27, loss 5.788 : 100%|██████████| 27/27 [00:01<00:00, 20.43it/s]
Epoch 28, loss 5.755 : 100%|██████████| 27/27 [00:01<00:00, 20.19it/s]
Epoch 29, loss 5.727 : 100%|██████████| 27/27 [00:01<00:00, 22.55it/s]
Epoch 30, loss 5.699 : 100%|██████████| 27/27 [00:01<00:00, 21.02it/s]
Epoch 31, loss 5.666 : 100%|██████████| 27/27 [00:01<00:00, 22.41it/s]
Epoch 32, loss 5.643 : 100%|██████████| 27/27 [00:01<00:00, 23.18it/s]
Epoch 

Early stopping triggered. Exit training


In [43]:
from datetime import datetime
print("current time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

current time: 2024-10-05 16:34:38


In [44]:
# 10/5 4:09 train start 600MB used

In [46]:
from datetime import datetime
print("current time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

current time: 2024-10-05 16:48:27


In [49]:
trainer.test()

  best_model_dict = torch.load(os.path.join(


******************** Testing Best Model ********************


Eval: N@1 0.0217, N@5 0.0459, N@10 0.0621, R@1 0.0217, R@5 0.0731, R@10 0.1225: 100%|██████████| 10/10 [00:00<00:00, 27.74it/s]

******************** Testing Metrics ********************
{'Recall@50': 0.2825367659330368, 'MRR@50': 0.0507874995470047, 'NDCG@50': 0.09637335240840912, 'Recall@20': 0.17545955926179885, 'MRR@20': 0.04745325967669487, 'NDCG@20': 0.07523392103612422, 'Recall@10': 0.1225183829665184, 'MRR@10': 0.043945787474513054, 'NDCG@10': 0.0620549526065588, 'Recall@5': 0.07306985296308995, 'MRR@5': 0.0372012882027775, 'NDCG@5': 0.04591377899050712, 'Recall@1': 0.02169117648154497, 'MRR@1': 0.02169117648154497, 'NDCG@1': 0.02169117648154497}





{'Recall@50': 0.2825367659330368,
 'MRR@50': 0.0507874995470047,
 'NDCG@50': 0.09637335240840912,
 'Recall@20': 0.17545955926179885,
 'MRR@20': 0.04745325967669487,
 'NDCG@20': 0.07523392103612422,
 'Recall@10': 0.1225183829665184,
 'MRR@10': 0.043945787474513054,
 'NDCG@10': 0.0620549526065588,
 'Recall@5': 0.07306985296308995,
 'MRR@5': 0.0372012882027775,
 'NDCG@5': 0.04591377899050712,
 'Recall@1': 0.02169117648154497,
 'MRR@1': 0.02169117648154497,
 'NDCG@1': 0.02169117648154497}

In [48]:
from datetime import datetime
print("current time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

current time: 2024-10-05 16:48:28


In [50]:
trainer.generate_candidates(os.path.join(export_root, 'retrieved.pkl'))

*************** Generating Candidates for Validation Set ***************


100%|██████████| 10/10 [00:00<00:00, 45.31it/s]


{'Recall@50': 0.3049180209636688, 'MRR@50': 0.06256759166717529, 'NDCG@50': 0.11019700765609741, 'Recall@20': 0.18852458894252777, 'MRR@20': 0.05895635113120079, 'NDCG@20': 0.08729086071252823, 'Recall@10': 0.12950819730758667, 'MRR@10': 0.05494340509176254, 'NDCG@10': 0.07247135043144226, 'Recall@5': 0.08852458745241165, 'MRR@5': 0.04918033257126808, 'NDCG@5': 0.05892314016819, 'Recall@1': 0.027868852019309998, 'MRR@1': 0.027868852019309998, 'NDCG@1': 0.027868852019309998}
****************** Generating Candidates for Test Set ******************


100%|██████████| 10/10 [00:00<00:00, 53.49it/s]


{'Recall@50': 0.2819672226905823, 'MRR@50': 0.05073552951216698, 'NDCG@50': 0.09626712650060654, 'Recall@20': 0.17540983855724335, 'MRR@20': 0.04737967252731323, 'NDCG@20': 0.0751628652215004, 'Recall@10': 0.1213114783167839, 'MRR@10': 0.04380302131175995, 'NDCG@10': 0.06170599162578583, 'Recall@5': 0.07377049326896667, 'MRR@5': 0.037295080721378326, 'NDCG@5': 0.04616577923297882, 'Recall@1': 0.021311474964022636, 'MRR@1': 0.021311474964022636, 'NDCG@1': 0.021311474964022636}
