In [1]:
import os
import unicodedata

import torch
import pandas as pd
from tqdm import tqdm
import fitz  # PyMuPDF
import pickle

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    BitsAndBytesConfig
)
from accelerate import Accelerator
from torchinfo import summary

# Langchain 관련
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from peft import LoraConfig, get_peft_model
from transformers import LlamaForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 4비트 양자화 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# 모델 ID 
model_id = "beomi/llama-2-ko-7b"
# 토크나이저 로드 및 설정
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.use_default_system_prompt = False

# 모델 로드 및 양자화 설정 적용
#model = AutoModelForCausalLM.from_pretrained(
#    model_id,
#    quantization_config=bnb_config,
#    device_map="auto",
#    trust_remote_code=True )

model = LlamaForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

#print(model)
#for name, param in model.named_parameters():
#    print(name, param.requires_grad)

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
Loading checkpoint shards: 100%|██████████| 15/15 [00:08<00:00,  1.76it/s]


In [3]:
# 전체 파라미터 수 및 학습 가능한 파라미터 수 계산
total_parameters = sum(p.numel() for p in model.parameters())
trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total number of parameters: {total_parameters}")
print(f"Trainable parameters: {trainable_parameters}")

Total number of parameters: 3617853440
Trainable parameters: 379850752


In [None]:
# View model summary
summary(
    model=model, 
    input_size=(1, 800)
)

In [4]:
num_layers = 32
target_module_list = []

for i in range(num_layers - 12, num_layers):
    target_module_list.extend([
        f"model.layers.{i}.self_attn.q_proj",
        f"model.layers.{i}.self_attn.k_proj",
        f"model.layers.{i}.self_attn.v_proj",
        f"model.layers.{i}.self_attn.o_proj",
        f"model.layers.{i}.mlp.gate_proj",
        f"model.model.layers.{i}.mlp.up_proj",
        f"model.layers.{i}.mlp.down_proj",
    ])
#for i in range(5):
#    target_module_list.extend([
#        f"model.layers.{i}.self_attn.q_proj",
#        f"model.layers.{i}.self_attn.k_proj",
#        f"model.layers.{i}.self_attn.v_proj",
#        f"model.layers.{i}.self_attn.o_proj",
#        f"model.layers.{i}.mlp.gate_proj",
#        f"model.model.layers.{i}.mlp.up_proj",
#        f"model.layers.{i}.mlp.down_proj",
#    ])

lora_config = LoraConfig(
    r=4,  
    lora_alpha=32, 
    lora_dropout=0.3,  
    target_modules=target_module_list,  
    task_type = "CAUSAL_LM",
    bias="none",
)

model = get_peft_model(model, lora_config)
for name, param in model.named_parameters():
    print(name, param.requires_grad)

base_model.model.model.embed_tokens.weight False
base_model.model.model.layers.0.self_attn.q_proj.weight False
base_model.model.model.layers.0.self_attn.k_proj.weight False
base_model.model.model.layers.0.self_attn.v_proj.weight False
base_model.model.model.layers.0.self_attn.o_proj.weight False
base_model.model.model.layers.0.mlp.gate_proj.weight False
base_model.model.model.layers.0.mlp.up_proj.weight False
base_model.model.model.layers.0.mlp.down_proj.weight False
base_model.model.model.layers.0.input_layernorm.weight False
base_model.model.model.layers.0.post_attention_layernorm.weight False
base_model.model.model.layers.1.self_attn.q_proj.weight False
base_model.model.model.layers.1.self_attn.k_proj.weight False
base_model.model.model.layers.1.self_attn.v_proj.weight False
base_model.model.model.layers.1.self_attn.o_proj.weight False
base_model.model.model.layers.1.mlp.gate_proj.weight False
base_model.model.model.layers.1.mlp.up_proj.weight False
base_model.model.model.layers.1.m

In [5]:
total_parameters = sum(p.numel() for p in model.parameters())
trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total number of parameters: {total_parameters}")
print(f"Trainable parameters: {trainable_parameters}")

Total number of parameters: 3620876288
Trainable parameters: 3022848


In [9]:
def setup_llm_pipeline():
    # HuggingFacePipeline 객체 생성
    text_generation_pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        #task="LlamaForCausalLM",
        temperature=0.2,
        return_full_text=False,
        max_new_tokens=128, 
    )

    hf = HuggingFacePipeline(pipeline=text_generation_pipeline)

    for name, param in model.named_parameters():
        print(name, param.requires_grad)
        
    return hf

In [10]:
# LLM 파이프라인
llm = setup_llm_pipeline()

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM'

base_model.model.model.embed_tokens.weight False
base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight False
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight True
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight True
base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight False
base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight True
base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight True
base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight False
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight True
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight True
base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight False
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight True
base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight True
base_model.model.model.layers.0.mlp.gate_proj.base_

  warn_deprecated(


In [11]:
# 전체 파라미터 수 및 학습 가능한 파라미터 수 계산
total_parameters = sum(p.numel() for p in llm.parameters())
trainable_parameters = sum(p.numel() for p in llm.parameters() if p.requires_grad)

print(f"Total number of parameters: {total_parameters}")
print(f"Trainable parameters: {trainable_parameters}")

AttributeError: 'HuggingFacePipeline' object has no attribute 'parameters'