# LlamaBiPoolingModel

## 0. imports

In [1]:
%load_ext jupyter_black

In [2]:
import sys

sys.path.append("..")

In [6]:
import torch
from transformers import AutoTokenizer, LlamaConfig
from llm2vec.models.bidirectional_pooling_llama import LlamaBiPoolingModel

# 모델과 토크나이저 초기화
model_name = "princeton-nlp/Sheared-LLaMA-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
config = LlamaConfig.from_pretrained(model_name)
config.return_dict = True

# 패딩 토큰 설정
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    config.pad_token_id = tokenizer.pad_token_id
# config의 hidden_size 확인 및 설정
print("Original hidden size:", config.hidden_size)
# 모델 초기화
model = LlamaBiPoolingModel.from_pretrained(
    model_name,
    config=config,
)
model.eval()  # 평가 모드로 설정
# 샘플 텍스트
texts = ["이것은 첫 번째 문장입니다.", "이것은 두 번째 문장이고 조금 더 깁니다."]

# 토크나이징
encoded = tokenizer(
    texts, padding=True, truncation=True, max_length=512, return_tensors="pt"
)

Original hidden size: 2048


Some weights of LlamaBiPoolingModel were not initialized from the model checkpoint at princeton-nlp/Sheared-LLaMA-1.3B and are newly initialized: ['model.pooler.cross_attend_blocks.0.fn.to_kv.weight', 'model.pooler.cross_attend_blocks.0.fn.to_out.weight', 'model.pooler.cross_attend_blocks.0.fn.to_q.weight', 'model.pooler.cross_attend_blocks.0.norm.bias', 'model.pooler.cross_attend_blocks.0.norm.weight', 'model.pooler.cross_attend_blocks.0.norm_context.bias', 'model.pooler.cross_attend_blocks.0.norm_context.weight', 'model.pooler.cross_attend_blocks.1.fn.net.0.bias', 'model.pooler.cross_attend_blocks.1.fn.net.0.weight', 'model.pooler.cross_attend_blocks.1.fn.net.2.bias', 'model.pooler.cross_attend_blocks.1.fn.net.2.weight', 'model.pooler.cross_attend_blocks.1.norm.bias', 'model.pooler.cross_attend_blocks.1.norm.weight', 'model.pooler.latents']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# 모델 실행
with torch.no_grad():
    outputs = model(
        input_ids=encoded["input_ids"], attention_mask=encoded["attention_mask"]
    )

  self.gen = func(*args, **kwds)


In [8]:
outputs

BaseModelOutputWithPast(last_hidden_state=tensor([[[-0.6794,  1.7302, -1.4477,  ...,  1.5808,  1.1388, -0.1745],
         [-0.6006,  1.7860, -1.5103,  ...,  1.5341,  1.1521, -0.1605],
         [-0.5618,  1.7893, -1.4580,  ...,  1.4146,  1.1732, -0.1746],
         ...,
         [-0.7058, -1.2204, -3.1394,  ...,  1.6059,  0.2257,  0.5286],
         [ 1.4494,  0.4674, -5.2517,  ...,  1.6118,  0.1614,  0.6358],
         [ 0.9880,  3.9466,  0.3288,  ...,  1.3256,  0.5920,  1.6477]],

        [[ 0.0326,  0.3458, -0.3208,  ..., -0.3215,  0.3400, -0.0141],
         [ 1.4273,  1.4933, -2.9011,  ..., -0.6158, -1.7805,  1.0105],
         [-0.9404,  0.3754, -3.8295,  ...,  1.0886,  0.1889,  0.9315],
         ...,
         [-0.3124, -0.5220, -2.6905,  ...,  1.3425,  0.1696, -0.3863],
         [-0.1434,  0.2810, -5.5816,  ...,  1.9946,  0.9346,  0.6066],
         [ 0.7252,  3.1273, -0.0612,  ...,  1.8544,  1.6593,  1.9946]]]), past_key_values=((tensor([[[[ 1.7746e+00,  1.8321e+00, -5.3450e-01,  ...,

In [9]:
# 결과 확인
pooled_output = outputs.pooler_output
print("Pooled output shape:", pooled_output.shape)  # 예상 shape: [2, 768]
print("Sample values:", pooled_output[0, :5])  # 첫 번째 문장의 처음 5개 값

Pooled output shape: torch.Size([2, 2048])
Sample values: tensor([-0.0088, -0.0214,  0.0127,  0.0127, -0.0315])
