In [None]:
# !pip install transformers
# !pip install peft
# !pip install pandas
# !pip install bitsandbytes

In [None]:
import os
import transformers
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM , PreTrainedTokenizerFast, AdamW, BitsAndBytesConfig, GPT2LMHeadModel, LlamaTokenizer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import pandas as pd
import tqdm
import urllib.request
import bitsandbytes
import warnings
warnings.filterwarnings("ignore")

In [None]:
torch.cuda.empty_cache()

In [None]:
# 토크나이저 초기화
tokenizer = PreTrainedTokenizerFast.from_pretrained('LDCC/LDCC-SOLAR-10.7B',  eos_token='</s>')

# 모델 초기화
model_id = "LDCC/LDCC-SOLAR-10.7B"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             torch_dtype=torch.float32,
                                             output_attentions=True)
# 수정된 LoRA 설정: 대상 모듈 이름을 수정함
lora_config = LoraConfig(
    r=8,  # LoRA 업데이트 행렬의 순위
    lora_alpha=32,  # LoRA 스케일링 팩터
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # 수정된 대상 모듈 이름
    lora_dropout=0.1,  # LoRA 드롭아웃 비율
    bias="none",  # LoRA의 편향 설정
    task_type="CAUSAL_LM"  # 태스크 유형
)


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LlamaTokenizer'. 
The class this function is called from is 'PreTrainedTokenizerFast'.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
class ChatDataset(Dataset):
    def __init__(self, train_data, tokenizer):
        self.train_data = train_data
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.train_data)

    def __getitem__(self, idx):
        question = self.train_data.Question.iloc[idx]
        answer = self.train_data.Answer.iloc[idx]
        bos_token = self.tokenizer.bos_token_id
        eos_token = self.tokenizer.eos_token_id

        sent = self.tokenizer.encode('<usr>' + question + '<sys>' + answer, add_special_tokens=False)
        return torch.tensor([bos_token] + sent + [eos_token], dtype=torch.long)

def collate_fn(batch):
    return torch.nn.utils.rnn.pad_sequence(batch, batch_first=True, padding_value=tokenizer.pad_token_id)

df = pd.read_csv('last_df.csv')

# 모델을 LoRA 파인 튜닝에 맞게 준비
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 여러 GPU를 사용하기 위해 DataParallel 적용
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = torch.nn.DataParallel(model)

model.to('cuda')  # 모델을 GPU로 이동

batch_size = 10
EPOCHS = 10
steps = len(df) // batch_size + 1
PATIENCE = 2  # 성능 개선이 없는 경우, 2 에포크 이후 학습 중단
best_loss = float('inf')
patience_counter = 0

optimizer = torch.optim.Adam(model.parameters(), lr=3e-5, eps=1e-08)


# 데이터셋과 데이터 로더 준비
chat_dataset = ChatDataset(df, tokenizer)
data_loader = DataLoader(chat_dataset, batch_size=batch_size, collate_fn=collate_fn)

# 옵티마이저 설정
optimizer = torch.optim.Adam(model.parameters(), lr=3e-5, eps=1e-08)

In [None]:
for epoch in range(EPOCHS):
    model.train()  # 모델을 학습 모드로 설정
    epoch_loss = 0

    for batch in tqdm.tqdm(data_loader):
        # 배치를 직접 장치로 이동
        batch = batch.to(device)
        labels = batch.clone()

        optimizer.zero_grad()

        # 모델의 입력과 레이블을 설정
        # 이 경우, 입력과 레이블이 같은 텐서를 사용합니다
        result = model(input_ids=batch, labels=labels)
        loss = result.loss
        batch_loss = loss.mean()

        batch_loss.backward()
        optimizer.step()

        epoch_loss += batch_loss.item()

    epoch_loss /= len(data_loader)

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, epoch_loss))

    # 얼리 스톱 조건 검사
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        patience_counter = 0  # 성능 개선 시 카운터 초기화
    else:
        patience_counter += 1  # 성능 개선 없음, 카운터 증가

    # Save the model and tokenizer after each epoch
    model_save_path = f'model_directory/model_epoch_{epoch + 1}'
    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)

    if patience_counter >= PATIENCE:
        print("Early stopping triggered. Training stopped.")
        break

  0%|          | 0/743 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
100%|██████████| 743/743 [1:26:59<00:00,  7.02s/it]


[Epoch:    1] cost = 0.935295391


100%|██████████| 743/743 [1:26:27<00:00,  6.98s/it]


[Epoch:    2] cost = 0.774953568


100%|██████████| 743/743 [1:26:21<00:00,  6.97s/it]


[Epoch:    3] cost = 0.688129196


100%|██████████| 743/743 [1:26:27<00:00,  6.98s/it]


[Epoch:    4] cost = 0.609878846


100%|██████████| 743/743 [1:26:28<00:00,  6.98s/it]


[Epoch:    5] cost = 0.538167688


100%|██████████| 743/743 [1:26:15<00:00,  6.97s/it]


[Epoch:    6] cost = 0.475637002


 27%|██▋       | 204/743 [22:34<51:33,  5.74s/it]  IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

100%|██████████| 743/743 [1:26:19<00:00,  6.97s/it]


[Epoch:   10] cost = 0.28240467


In [None]:
# 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
!huggingface-cli login

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): Traceback (most recent call last):
  File "/usr/local/bin/huggingface-cli", line 8, in <module>
    sys.exit(main())
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/commands/huggingface_cli.py", line 49, in main
    service.run()
  File "/usr/local/lib/python3

In [None]:
import getpass

# Hugging Face 토큰을 입력하세요.
huggingface_token = getpass.getpass("Enter your Hugging Face token: ")

# 환경 변수에 토큰 설정
%env HF_HOME=/root
%env HF_TOKEN=$huggingface_token

# 로그인
!huggingface-cli whoami


Enter your Hugging Face token:  ········


env: HF_HOME=/root
env: HF_TOKEN=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


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


coconut00


In [None]:
# 모델 저장 이름!
model.push_to_hub("LDCC_HSTC")

adapter_model.safetensors:   0%|          | 0.00/40.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/coconut00/LDCC_HSTC/commit/e37e60f32fd238c7d842f7c73f743c67ac0bd3d8', commit_message='Upload model', commit_description='', oid='e37e60f32fd238c7d842f7c73f743c67ac0bd3d8', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# 토크나이저 저장 이름
tokenizer.push_to_hub("LDCC_HSTC")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/coconut00/LDCC_HSTC/commit/1b69f122c417e50268cd6650be63de36dbdf2061', commit_message='Upload tokenizer', commit_description='', oid='1b69f122c417e50268cd6650be63de36dbdf2061', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
def return_answer_by_chatbot(user_text):
    sent = '<usr>' + user_text + '<sys>'
    input_ids = [tokenizer.bos_token_id] + tokenizer.encode(sent, add_special_tokens=False)
    input_ids = torch.tensor([input_ids], dtype=torch.long).to(device)

    output = model.generate(input_ids, max_length=150, do_sample=True, top_k=2)
    sentence = tokenizer.decode(output[0].tolist())

    sys_start = '<sys>'
    sys_end = '</s>'

    # Find the starting and ending indices of the system response
    start_idx = sentence.find(sys_start)
    end_idx = sentence.find(sys_end, start_idx + len(sys_start))

    # Check if both start and end indices are found
    if start_idx != -1 and end_idx != -1:
        chatbot_response = sentence[start_idx + len(sys_start):end_idx]
        return chatbot_response.strip()
    else:
        return "에러: 모델 응답에서 '<sys>'와 '</s>'를 찾을 수 없습니다."


In [None]:
return_answer_by_chatbot('도배하는법')

'벽지를  붙 일 때는  벽 의 상태 를 잘 점검하여  균 일한 벽면을  만들고 , 필요한  경우 벽지를  부착하기  전에  벽 을 평평하게  만들어 야합니다 . 또한 , 벽지의  패턴을 정확히 맞 추고 공기 방울 이 생기지  않도록  주의하여  벽지를  부착해야  합니다 . 이렇게  하면  벽지가  깔끔하고  아름답게  부착 될 수 있습니다 .'

In [None]:
return_answer_by_chatbot('누수가 생기면 어떻게 해')

'누수가  발생했을  때는  빠르게  조치해야  합니다 . 먼 저 물이  누출되는  원인 을 찾아내고 , 그 부분 을 수리하는  작업 을 시작해야  합니다 . 또한 , 물이  누출되는  동안  가구 나 다른  물건을  보호하기  위해  물기가  누출되는  방향 에 대한  방수 조치 를 취해야  합니다 .'