In [1]:
import sys
import os

sys.path.append("/home/laststar/source/OpenLLM-Rec/source")

In [2]:
import easydict

# jupyter didn't support argparse. so, I use 'easydict' module
args = easydict.EasyDict({
    ################
    # Dataset
    ################
    'dataset_code': 'ml-100k', # ml-100k, beauty, games
    'min_rating': 0,  # default: 0
    'min_uc': 5,  # default: 5
    'min_sc': 5,  # default: 5
    'seed': 42,  # default: 42

    ################
    # Dataloader
    ################
    'train_batch_size': 64,  # default: 64
    'val_batch_size': 64,  # default: 64
    'test_batch_size': 64,  # default: 64
    'num_workers': 0,  # default: 8
    'sliding_window_size': 1.0,  # default: 1.0
    'negative_sample_size': 10,  # default: 10

    ################
    # Trainer
    ################
    # optimization #
    'device': 'cuda',  # default: 'cuda'  # choices: ['cpu', 'cuda']
    'num_epochs': 500,  # default: 500
    'optimizer': 'AdamW',  # default: 'AdamW'  # choices: ['AdamW', 'Adam']
    'weight_decay': 0.01,  # default: None
    'adam_epsilon': 1e-9,  # default: 1e-9
    'momentum': None,  # default: None
    'lr': 0.001,  # default: 0.001
    'max_grad_norm': 5.0,  # default: 5.0
    'enable_lr_schedule': True,  # default: True
    'decay_step': 10000,  # default: 10000
    'gamma': 1,  # default: 1
    'enable_lr_warmup': True,  # default: True
    'warmup_steps': 100,  # default: 100

    # evaluation #
    'val_strategy': 'iteration',  # default: 'iteration'  # choices: ['epoch', 'iteration']
    'val_iterations': 500,  # default: 500  # only for iteration val_strategy
    'early_stopping': True,  # default: True
    'early_stopping_patience': 20,  # default: 20
    'metric_ks': [1, 5, 10, 20, 50],  # default: [1, 5, 10, 20, 50]
    'rerank_metric_ks': [1, 5, 10],  # default: [1, 5, 10]
    'best_metric': 'Recall@10',  # default: 'Recall@10'
    'rerank_best_metric': 'NDCG@10',  # default: 'NDCG@10'
    'use_wandb': False,  # default: False

    ################
    # Retriever Model
    ################
    'model_code': 'bert',  # default: None
    'bert_max_len': 100,  # default: 50
    'bert_hidden_units': 256,  # default: 64
    'bert_num_blocks': 2,  # default: 2
    'bert_num_heads': 4,  # default: 2
    'bert_head_size': 32,  # default: 32
    'bert_dropout': 0.1,  # default: 0.2
    'bert_mask_prob': 0.15,  # default: 0.25
    
    # bertrec
    'train_negative_sampler_code': 'random',
    'train_negative_sample_size': 0,
    'train_negative_sampling_seed': 0,
    'test_negative_sampler_code': 'random',
    'test_negative_sample_size': 100,
    'test_negative_sampling_seed': 98765,
    'model_init_seed': 0,
    'num_gpu': 1,
    'optimizer': 'Adam',
    'log_period_as_iter': 12800,
    
    ################
    # LLM Model
    ################
    'llm_base_model': 'meta-llama/Llama-2-7b-hf',  # default: 'meta-llama/Llama-2-7b-hf'
    'llm_base_tokenizer': 'meta-llama/Llama-2-7b-hf',  # default: 'meta-llama/Llama-2-7b-hf'
    'llm_max_title_len': 32,  # default: 32
    'llm_max_text_len': 1536,  # default: 1536
    'llm_max_history': 20,  # default: 20
    'llm_train_on_inputs': False,  # default: False
    'llm_negative_sample_size': 19,  # default: 19  # 19 negative & 1 positive
    'llm_system_template': "Given user history in chronological order, recommend an item from the candidate pool with its index letter.",  # default: "Given user history in chronological order, recommend an item from the candidate pool with its index letter."
    'llm_input_template': 'User history: {}; \n Candidate pool: {}',  # default: 'User history: {}; \n Candidate pool: {}'
    'llm_load_in_4bit': True,  # default: True
    'llm_retrieved_path': "/home/laststar/data/model/OpenLLM-Rec",  # default: None
    'llm_cache_dir': None,  # default: None

    ################
    # Lora
    ################
    'lora_r': 8,  # default: 8
    'lora_alpha': 32,  # default: 32
    'lora_dropout': 0.05,  # default: 0.05
    'lora_target_modules': ['q_proj', 'v_proj'],  # default: ['q_proj', 'v_proj']
    'lora_num_epochs': 1,  # default: 1
    'lora_val_iterations': 100,  # default: 100
    'lora_early_stopping_patience': 20,  # default: 20
    'lora_lr': 1e-4,  # default: 1e-4
    'lora_micro_batch_size': 16,  # default: 16

    #################
    # Custom
    #################
    'alpaca_file': "../source/data/dataloader/templates"
})

In [3]:
EXPORT_ROOT = "/home/laststar/data/model/OpenLLM-Rec/bert"
MODEL_PARAMETER_PATH = "/home/laststar/data/model/OpenLLM-Rec/bert/models/best_acc_model.pth"

In [4]:
import data.datasets
import data.dataloader
from data.dataloader import *
from data.datasets import *
from model.bert import BERT4Rec
from trainer.utils import AverageMeterSet

import torch

In [5]:
train, val, test = dataloader_factory(args)

Already preprocessed. Skip preprocessing
Negatives samples exist. Loading.
Negatives samples exist. Loading.


In [6]:
model = BERT4Rec(args)

In [7]:
device = args.device
print("device : ", device)
model = model.to(device)

device :  cuda


In [8]:
model.state_dict()
model.load_state_dict(torch.load(MODEL_PARAMETER_PATH)['model_state_dict'])
model.eval()

  model.load_state_dict(torch.load(MODEL_PARAMETER_PATH)['model_state_dict'])


BERT4Rec(
  (bert): BERT(
    (embedding): BERTEmbedding(
      (token): TokenEmbedding(3652, 256, padding_idx=0)
      (position): PositionalEmbedding(
        (pe): Embedding(100, 256)
      )
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer_blocks): ModuleList(
      (0-1): 2 x TransformerBlock(
        (attention): MultiHeadedAttention(
          (linear_layers): ModuleList(
            (0-2): 3 x Linear(in_features=256, out_features=256, bias=True)
          )
          (output_linear): Linear(in_features=256, out_features=256, bias=True)
          (attention): Attention()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (feed_forward): PositionwiseFeedForward(
          (w_1): Linear(in_features=256, out_features=1024, bias=True)
          (w_2): Linear(in_features=1024, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (activation): GELU(approximate='none')
        )
        (input_sublayer): Sublay

In [9]:
average_meter_set = AverageMeterSet()

# Test BERT Model

In [10]:
batch = None
seqs = None
candidates = None
labels = None

for batch in test:
    batch = [x.to(device) for x in batch]
    seqs, candidates, labels = batch
    break

print(f'seqs shape : {seqs.shape}')
print(f'labels shape : {labels.shape}')
print(f'candidates shape : {candidates.shape}')

seqs shape : torch.Size([64, 100])
labels shape : torch.Size([64, 101])
candidates shape : torch.Size([64, 101])


In [11]:
print(f"candidates : {candidates}")

candidates : tensor([[2368,  610,  610,  ...,  610,  610,  610],
        [1615,    1,    1,  ...,    1,    1,    1],
        [3315,    2,    2,  ...,    2,    2,    2],
        ...,
        [ 470,   61,   61,  ...,   61,   61,   61],
        [ 322,   62,   62,  ...,   62,   62,   62],
        [ 853,   63,   63,  ...,   63,   63,   63]], device='cuda:0')


In [12]:
scores = model(seqs)  # B x T x V
print(f"scores shape : {scores.shape}")

scores shape : torch.Size([64, 100, 3651])


In [13]:
scores = scores[:, -1, :]  # B x V
print(f"scores shape : {scores.shape}")
print(f"scores : {scores}")

scores shape : torch.Size([64, 3651])
scores : tensor([[-1.2499,  2.2739, -0.9554,  ..., -0.6270, -0.8693, -1.3612],
        [-1.2320,  2.3361, -1.0500,  ..., -0.7096, -0.9411, -1.3691],
        [-1.2026,  1.5721,  2.4841,  ...,  2.0266,  1.3509, -0.6006],
        ...,
        [-1.3748,  2.1765, -0.5572,  ..., -0.2826, -0.5927, -1.3798],
        [-0.3150, -0.3248,  2.3508,  ...,  1.8745,  1.5297,  0.2448],
        [-1.7066,  2.3171,  0.4470,  ...,  0.5506,  0.0307, -1.4714]],
       device='cuda:0', grad_fn=<SliceBackward0>)


gather(dim, index)는 dim 차원에서 index에 해당하는 값만 선택합니다

In [14]:
scores = scores.gather(1, candidates)  # B x C
print(f"scores shape : {scores.shape}")
print(f"scores : {scores}")

scores shape : torch.Size([64, 101])
scores : tensor([[-0.6026,  0.1785,  0.1785,  ...,  0.1785,  0.1785,  0.1785],
        [-0.3160,  2.3361,  2.3361,  ...,  2.3361,  2.3361,  2.3361],
        [ 0.8027,  2.4841,  2.4841,  ...,  2.4841,  2.4841,  2.4841],
        ...,
        [ 2.7742,  0.0594,  0.0594,  ...,  0.0594,  0.0594,  0.0594],
        [ 1.5143, -1.1689, -1.1689,  ..., -1.1689, -1.1689, -1.1689],
        [ 2.5579,  0.5730,  0.5730,  ...,  0.5730,  0.5730,  0.5730]],
       device='cuda:0', grad_fn=<GatherBackward0>)


In [15]:
labels

tensor([[1, 0, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0],
        ...,
        [1, 0, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0]], device='cuda:0')

In [16]:
def recalls_and_ndcgs_for_ks(scores, labels, ks):
    metrics = {}

    scores = scores
    labels = labels
    answer_count = labels.sum(1)

    labels_float = labels.float()
    print(labels_float)
    rank = (-scores).argsort(dim=1)
    print(rank)
    cut = rank
    for k in sorted(ks, reverse=True):
       cut = cut[:, :k]
       hits = labels_float.gather(1, cut)
       metrics['Recall@%d' % k] = \
           (hits.sum(1) / torch.min(torch.Tensor([k]).to(labels.device), labels.sum(1).float())).mean().cpu().item()

       position = torch.arange(2, 2+k)
       weights = 1 / torch.log2(position.float())
       dcg = (hits * weights.to(hits.device)).sum(1)
       idcg = torch.Tensor([weights[:min(int(n), k)].sum() for n in answer_count]).to(dcg.device)
       ndcg = (dcg / idcg).mean()
       metrics['NDCG@%d' % k] = ndcg.cpu().item()

    return metrics

In [17]:
recalls_and_ndcgs_for_ks(scores, labels, args.metric_ks)

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')
tensor([[  1,   2,   3,  ...,  99, 100,   0],
        [  1,   2,   3,  ...,  99, 100,   0],
        [  1,   2,   3,  ...,  99, 100,   0],
        ...,
        [  0,   1,   2,  ...,  98,  99, 100],
        [  0,   1,   2,  ...,  98,  99, 100],
        [  0,   1,   2,  ...,  98,  99, 100]], device='cuda:0')


{'Recall@50': 0.65625,
 'NDCG@50': 0.65625,
 'Recall@20': 0.65625,
 'NDCG@20': 0.65625,
 'Recall@10': 0.65625,
 'NDCG@10': 0.65625,
 'Recall@5': 0.65625,
 'NDCG@5': 0.65625,
 'Recall@1': 0.65625,
 'NDCG@1': 0.65625}