In [1]:
import sys
import os

sys.path.append("/home/laststar/source/open-llm-rec/source")

In [2]:
import easydict

In [3]:
# jupyter didn't support argparse. so, I use 'easydict' module
args = easydict.EasyDict({
    ################
    # Dataset
    ################
    'dataset_code': 'ml-100k', # ml-100k, beauty, games
    'min_rating': 0,  # default: 0
    'min_uc': 5,  # default: 5
    'min_sc': 5,  # default: 5
    'seed': 42,  # default: 42

    ################
    # Dataloader
    ################
    'train_batch_size': 64,  # default: 64
    'val_batch_size': 64,  # default: 64
    'test_batch_size': 64,  # default: 64
    'num_workers': 0,  # default: 8
    'sliding_window_size': 1.0,  # default: 1.0
    'negative_sample_size': 10,  # default: 10

    ################
    # Trainer
    ################
    # optimization #
    'device': 'cuda',  # default: 'cuda'  # choices: ['cpu', 'cuda']
    'num_epochs': 500,  # default: 500
    'optimizer': 'AdamW',  # default: 'AdamW'  # choices: ['AdamW', 'Adam']
    'weight_decay': 0.01,  # default: None
    'adam_epsilon': 1e-9,  # default: 1e-9
    'momentum': None,  # default: None
    'lr': 0.001,  # default: 0.001
    'max_grad_norm': 5.0,  # default: 5.0
    'enable_lr_schedule': True,  # default: True
    'decay_step': 10000,  # default: 10000
    'gamma': 1,  # default: 1
    'enable_lr_warmup': True,  # default: True
    'warmup_steps': 100,  # default: 100

    # evaluation #
    'val_strategy': 'iteration',  # default: 'iteration'  # choices: ['epoch', 'iteration']
    'val_iterations': 500,  # default: 500  # only for iteration val_strategy
    'early_stopping': True,  # default: True
    'early_stopping_patience': 20,  # default: 20
    'metric_ks': [1, 5, 10, 20, 50],  # default: [1, 5, 10, 20, 50]
    'rerank_metric_ks': [1, 5, 10],  # default: [1, 5, 10]
    'best_metric': 'Recall@10',  # default: 'Recall@10'
    'rerank_best_metric': 'NDCG@10',  # default: 'NDCG@10'
    'use_wandb': False,  # default: False

    ################
    # Retriever Model
    ################
    'model_code': 'lru',  # default: None
    'bert_max_len': 50,  # default: 50
    'bert_hidden_units': 64,  # default: 64
    'bert_num_blocks': 2,  # default: 2
    'bert_num_heads': 2,  # default: 2
    'bert_head_size': 32,  # default: 32
    'bert_dropout': 0.2,  # default: 0.2
    'bert_attn_dropout': 0.2,  # default: 0.2
    'bert_mask_prob': 0.25,  # default: 0.25

    ################
    # LLM Model
    ################
    'llm_base_model': 'meta-llama/Llama-2-7b-hf',  # default: 'meta-llama/Llama-2-7b-hf'
    'llm_base_tokenizer': 'meta-llama/Llama-2-7b-hf',  # default: 'meta-llama/Llama-2-7b-hf'
    'llm_max_title_len': 32,  # default: 32
    'llm_max_text_len': 1536,  # default: 1536
    'llm_max_history': 20,  # default: 20
    'llm_train_on_inputs': False,  # default: False
    'llm_negative_sample_size': 19,  # default: 19  # 19 negative & 1 positive
    'llm_system_template': "Given user history in chronological order, recommend an item from the candidate pool with its index letter.",  # default: "Given user history in chronological order, recommend an item from the candidate pool with its index letter."
    'llm_input_template': 'User history: {}; \n Candidate pool: {}',  # default: 'User history: {}; \n Candidate pool: {}'
    'llm_load_in_4bit': True,  # default: True
    'llm_retrieved_path': None,  # default: None
    'llm_cache_dir': None,  # default: None

    ################
    # Lora
    ################
    'lora_r': 8,  # default: 8
    'lora_alpha': 32,  # default: 32
    'lora_dropout': 0.05,  # default: 0.05
    'lora_target_modules': ['q_proj', 'v_proj'],  # default: ['q_proj', 'v_proj']
    'lora_num_epochs': 1,  # default: 1
    'lora_val_iterations': 100,  # default: 100
    'lora_early_stopping_patience': 20,  # default: 20
    'lora_lr': 1e-4,  # default: 1e-4
    'lora_micro_batch_size': 16,  # default: 16
})

In [4]:
import data.datasets
import data.dataloader
from data.dataloader import *
from data.datasets import *

In [5]:
train, val, test = dataloader_factory(args)

Already preprocessed. Skip preprocessing


In [6]:
MODEL_PARAMETER_PATH = "/home/laststar/data/model/open-llm-rec/models/best_acc_model.pth"

In [7]:
device = args.device
device

'cuda'

In [8]:
import torch

In [9]:
import os
import torch

os.environ['TOKENIZERS_PARALLELISM'] = 'false'

import wandb
import argparse

from config import *
from model import *
from data.dataloader import*
from trainer import *

In [10]:
model = LRURec(args)



In [11]:
model = model.to(device)

In [12]:
model.state_dict()

OrderedDict([('embedding.token.weight',
              tensor([[-0.0021,  0.0086,  0.0172,  ...,  0.0058,  0.0066,  0.0242],
                      [-0.0093,  0.0341, -0.0020,  ..., -0.0095,  0.0187,  0.0108],
                      [ 0.0125, -0.0266,  0.0351,  ..., -0.0082,  0.0339,  0.0008],
                      ...,
                      [-0.0073,  0.0329, -0.0304,  ..., -0.0243,  0.0179,  0.0008],
                      [ 0.0162,  0.0032,  0.0313,  ..., -0.0098,  0.0061, -0.0152],
                      [ 0.0270,  0.0087, -0.0034,  ...,  0.0110,  0.0135,  0.0345]],
                     device='cuda:0')),
             ('embedding.layer_norm.weight',
              tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]

논문 소스에서 저장할때 state_dict 뿐만 아니라 여러 평가 결과를 포함한 dictionary 형태로 저장하기 때문에 아래와 같이 추가적인 접근이 필요

In [13]:
model.load_state_dict(torch.load(MODEL_PARAMETER_PATH)['model_state_dict'])
model.eval()

  model.load_state_dict(torch.load(MODEL_PARAMETER_PATH)['model_state_dict'])


LRURec(
  (embedding): LRUEmbedding(
    (token): Embedding(3651, 64)
    (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (embed_dropout): Dropout(p=0.2, inplace=False)
  )
  (model): LRUModel(
    (lru_blocks): ModuleList(
      (0-1): 2 x LRUBlock(
        (lru_layer): LRULayer(
          (in_proj): Linear(in_features=64, out_features=128, bias=True)
          (out_proj): Linear(in_features=128, out_features=64, bias=True)
          (out_vector): Identity()
          (dropout): Dropout(p=0.2, inplace=False)
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
        (feed_forward): PositionwiseFeedForward(
          (w_1): Linear(in_features=64, out_features=256, bias=True)
          (w_2): Linear(in_features=256, out_features=64, bias=True)
          (activation): GELU(approximate='none')
          (dropout): Dropout(p=0.2, inplace=False)
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
 

In [14]:
print(model)

LRURec(
  (embedding): LRUEmbedding(
    (token): Embedding(3651, 64)
    (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (embed_dropout): Dropout(p=0.2, inplace=False)
  )
  (model): LRUModel(
    (lru_blocks): ModuleList(
      (0-1): 2 x LRUBlock(
        (lru_layer): LRULayer(
          (in_proj): Linear(in_features=64, out_features=128, bias=True)
          (out_proj): Linear(in_features=128, out_features=64, bias=True)
          (out_vector): Identity()
          (dropout): Dropout(p=0.2, inplace=False)
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
        (feed_forward): PositionwiseFeedForward(
          (w_1): Linear(in_features=64, out_features=256, bias=True)
          (w_2): Linear(in_features=256, out_features=64, bias=True)
          (activation): GELU(approximate='none')
          (dropout): Dropout(p=0.2, inplace=False)
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
 

**신경망 해석**

최종 출력: (batch_size, sequence_length, vocab_size(3651)) <br/>
윗 구조는 매 시퀸스 위치에서 아이템 3651개 중 어떤 아이템을 추천할지에 대한 점수 -> 모든 후보 아이템에 대한 점수


# Generating Candidate for Validation Set

In [15]:
batch = None
seqs = None
labels = None

for batch in val:
    batch = [x.to(device) for x in batch]
    seqs, labels = batch
    break

print(f'seqs shape : {seqs.shape}')
print(f'labels shape : {labels.shape}')

seqs shape : torch.Size([64, 50])
labels shape : torch.Size([64, 1])


scores[:, -1, :] 하는 이유 -> 추천 모델은 사용자가 최근에 본 아이템(시퀀스의 마지막 위치) 이후에 어떤 아이템을 추천할지 예측하는 데 중점을 둡니다.

In [16]:
scores = model(seqs)
print(f"before score shape : {scores.shape}")
scores = scores[:, -1, :]
print(f"score shape : {scores.shape}")

before score shape : torch.Size([64, 50, 3651])
score shape : torch.Size([64, 3651])


In [17]:
B, L = seqs.shape
print(B, L)

64 50


**아래 코드 해석**

scores에서 3651개의 벡터 값은 현 시퀸스에 대한 모든 아이템 3651개별 점수 값을 의미 <br/>
입력으로 집어넣은 seq에 해당 되는 점수값은 이미 사용자가 구매한 아이템이므로 필요하지 않으므로 -1e9값을 넣어 제외함 <br/>


In [18]:
for i in range(L):
    scores[torch.arange(scores.size(0)), seqs[:, i]] = -1e9

In [19]:
scores[:, 0] = -1e9

In [20]:
val_probs = []
val_labels = []

In [21]:
val_probs.extend(scores.tolist())
val_labels.extend(labels.view(-1).tolist())

print(f'scores shape : {np.array(val_probs).shape}')
print(f'labels shape : {np.array(val_labels).shape}')

scores shape : (64, 3651)
labels shape : (64,)


In [22]:
labels.view(-1)

tensor([1304,    2, 2826, 2752,  247,  778,  109,  116, 1820, 3476, 1318, 1161,
        2394,  371, 3030, 1152,   99, 2982, 1227, 1358, 1915, 2539,    6, 3047,
        2982,  297, 1634,  145, 3236,   98,  478,  873, 1346, 2155,  467,  667,
         509,  312, 1275,  175, 3316, 1733,  780,  478, 2388,  257, 3104,  160,
        2927,  897, 2519, 1111,  771,  463, 2423,  307, 1164,  538, 1735,  864,
        3490, 3120, 3618, 1796], device='cuda:0')

val_probs -> 64개 유저별 모델이 예측한 점수 값 <br/>
val_labels -> 64개 유저별 실제 정답값

# Generating Candidate for Test Set

validaiont set과 작업 동일