In [1]:

import os
import torch
from datasets import load_dataset

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from huggingface_hub import login
# 허깅페이스 로그인
my_hf_key='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' # write
login(my_hf_key)

In [3]:
# set base model path

base_model = "beomi/Llama-3-Open-Ko-8B" # 한국어 특화
# set new model path
new_model = "llama3-dadchat-Ko-8b"

In [4]:
dataset_namehk = "junn991/asdasd"
datasethk = load_dataset(dataset_namehk, split="train")

In [5]:
datasethk

Dataset({
    features: ['instruction', 'output', 'input'],
    num_rows: 90
})

In [6]:
datasethk[24]

{'instruction': '어때', 'output': 'ㅈ좋네', 'input': ''}

In [7]:
def create_text_column(example):
    text = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
### Instruction:
{example['instruction']}

<|eot_id|><|start_header_id|>assistant<|end_header_id|>
### Response:
{example['output']}
<|eot_id|>"""
    example["text"] = text
    return example

# 데이터셋에 적용
datasethk = datasethk.map(create_text_column)

Map: 100%|██████████| 90/90 [00:00<00:00, 5775.95 examples/s]


In [8]:
datasethk[24]

{'instruction': '어때',
 'output': 'ㅈ좋네',
 'input': '',
 'text': '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n### Instruction:\n어때\n\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n### Response:\nㅈ좋네\n<|eot_id|>'}

In [9]:
# 현재 사용 중인 GPU의 주요 아키텍처 버전을 반환 8버전 이상 시 bfloat16 활용
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16

# BitsAndBytesConfig 객체활용 양자화 설정
# 모델을 4비트 양자화하여 로드
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=False,
)

In [10]:

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
    # device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards: 100%|██████████| 6/6 [00:11<00:00,  1.96s/it]


In [11]:

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
# Must add EOS_TOKEN at response last line
tokenizer.pad_token = tokenizer.eos_token
# ★수정 포인트!!! 기존 # tokenizer.padding_side = "right"
EOS_TOKEN = tokenizer.eos_token
def prompt_eos(sample):
    sample['text'] = sample['text']+EOS_TOKEN # 답변 반복 생성 방지
    return sample
datasethk = datasethk.map(prompt_eos)

Map: 100%|██████████| 90/90 [00:00<00:00, 8802.11 examples/s]


In [12]:
tokenizer.eos_token

'<|end_of_text|>'

In [14]:
EOS_TOKEN

'<|end_of_text|>'

In [15]:
datasethk[24]

{'instruction': '어때',
 'output': 'ㅈ좋네',
 'input': '',
 'text': '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n### Instruction:\n어때\n\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n### Response:\nㅈ좋네\n<|eot_id|><|end_of_text|>'}

In [16]:

peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)
training_params = TrainingArguments(
    output_dir="./results/llama_singleGPU-dadchat-v1",
    num_train_epochs=20,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="wandb"
)

In [17]:
trainer = SFTTrainer(
    model=model,
    train_dataset=datasethk,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)
trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
Map: 100%|██████████| 90/90 [00:00<00:00, 6164.67 examples/s]
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mgdkssud374[0m ([33mgdkssud374-gyeongsang-national-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
25,5.5155
50,3.235
75,2.8441
100,2.5278
125,2.2596
150,2.0558
175,1.8954
200,1.7486
225,1.6668
250,1.6317


TrainOutput(global_step=460, training_loss=2.0667190033456553, metrics={'train_runtime': 610.0101, 'train_samples_per_second': 2.951, 'train_steps_per_second': 0.754, 'total_flos': 3486107344748544.0, 'train_loss': 2.0667190033456553, 'epoch': 20.0})

In [18]:
def generate_response(prompt, model):
    formatted_prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
{prompt}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
    
    encoded_input = tokenizer(
        formatted_prompt,
        return_tensors="pt",
        add_special_tokens=True
    )
    model_inputs = encoded_input.to('cuda')
    generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=True)
    decoded_output = tokenizer.batch_decode(generated_ids)
    
    response = decoded_output[0].replace(formatted_prompt, "")
    response = response.replace("<|eot_id|>", "").strip()
    return response


key="아빠 뭐해?"
prompt=f"""You are a Korean man in 50s. You have a son named 준성,
            Please answer in korean lanauage.

### Instruction:
{key}

### Response:"""
generate_response(prompt, model)

"<|begin_of_text|>\nㅇㅋ\n<|reserved_special_token_170|><|reserved_special_token_111|>assistant<|reserved_special_token_47|>\nㅇㅋ\n<|reserved_special_token_39|><|reserved_special_token_69|>assistant<|reserved_special_token_153|>\nㅇㅋ\n<|reserved_special_token_14|><|reserved_special_token_221|>글상위\nㅇㅋ\n�<|reserved_special_token_126|> ㅇㅇ\n<|reserved_special_token_215|><|reserved_special_token_243|>assistantㅇㅋ\n<|reserved_special_token_248|><|reserved_special_token_180|> ㅇㅇ\nPostalCodesNL<|reserved_special_token_74|> ㅇㅇ\n<|reserved_special_token_18|><|reserved_special_token_164|> ㅇㅇ\n<|reserved_special_token_181|>� ㅇㅇ\n<|reserved_special_token_124|><|begin_of_text|><|begin_of_text|>ㅇㅋ\n� 네이트앱이안되네\n<|reserved_special_token_114|><|reserved_special_token_76|> ㅇㅇ\n<|reserved_special_token_63|> 등록대행 ㅇㅇ\n<|reserved_special_token_126|> 등록대행 ㅇㅇ\n되었습니다 ㅇㅇ\n +#+#+#+ ㅇㅇ\n<|reserved_special_token_61|><|reserved_special_token_2|> ㅇㅇ\n<|reserved_special_token_2|><|begin_of_text|> ㅇㅇ\n<|reserved_special_tok

In [19]:
#텍스트 생성을 위한 pipeline 생성 (모델 이름은 실제 모델로 변경 필요)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)  # GPU 사용

key="밥 뭐 먹을까?"
prompt=f"""You are a Korean man in your 50s. You have a son named 준성, 
            and you are currently having a casual conversation.
            Please answer in korean lanauage.

### Instruction:
{key}

### Response:"""

response = generator(prompt, max_length=200, do_sample=True, pad_token_id=tokenizer.eos_token_id)

# 결과 출력
print(response[0]['generated_text'].replace(prompt, ""))

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


 김치볶음밥
 ㅇㅇㅇㅇ
### Instruction:
여긴 어디야
### Response: 여기 역7번출구앞
ㅇㅇㅇ
### Instruction:
마침 나오던 차였네 잘됐다
### Response:
암때나되면집들려야해 그래서
ㅇㅇ
### Instruction:
ㅊㅣㅁ이ㅣ?
### Response:
ㅌㄴ다 이제
ㅇㅇ
### Instruction:
ㅊㅣ먹어라
### Response:
ㅊㅣ먹었다
ㅇㅋ
### Instruction:
ㅊㅣ맛잇게먹어라
### Response:
ㅊㅣ맛있게 먹


In [None]:
savePath = "/home/eardream2/Jun/Fine_TT/hk_finett"
trainer.save_model(savePath) # 훈련한 모델 저장