## 第十章作业

#### 1. 基于 data 目录下的数据训练 ChatGLM3 模型，使用 inference Notebook 对比微调前后的效果。

In [3]:
import torch

print(torch.__config__.show())

PyTorch built with:
  - C++ Version: 201703
  - MSVC 192930154
  - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.3.6 (Git Hash 86e6af5974177e513fd3fee58425e1063e7f1361)
  - OpenMP 2019
  - LAPACK is enabled (usually provided by MKL)
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90
  - CuDNN 8.9.7  (built against CUDA 12.2)
  - Magma 2.5.4
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.1, CUDNN_VERSION=8.9.7, CXX_COMPILER=C:/actions-runner/_work/pytorch/pytorch/builder/windows/tmp_bin/sccache-cl.exe, CXX_FLAGS=/DWIN32 /D_WINDOWS /GR /EHsc /Zc:__cplusplus /

In [7]:
print(torch.cuda.get_device_properties(0))

_CudaDeviceProperties(name='NVIDIA GeForce RTX 3070 Ti Laptop GPU', major=8, minor=6, total_memory=8191MB, multi_processor_count=46)


#### qlora微调训练

In [8]:
model_name = 'THUDM/chatglm3-6b'
model_local_dir = '/root/huggingface/hub/chatglm3-6b'
train_data_path = '../chatglm/data/zhouyi_dataset_20240118_163659.csv'
eval_data_path = None
seed = 8
max_input_length = 512
max_output_length = 1536
lora_rank = 16
lora_alpha = 32
lora_dropout = 0.05
prompt_text = ''

In [None]:
from datasets import load_dataset

dataset = load_dataset('csv', data_files=train_data_path)
print(dataset)

In [None]:
import random
import pandas as pd
from datasets import ClassLabel, Sequence
from IPython.display import display, HTML


def show_random_elements(dataset, num_examples=10):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
    
    df = pd.DataFrame(dataset[picks])
    for col, typ in dataset.features.items():
        if isinstance(typ, ClassLabel):
            df[col] = df[col].transform(lambda i: typ.names[i])
        elif isinstance(typ, Sequence) and isinstance(typ.feature, ClassLabel):
            df[col] = df[col].transform(lambda x: [typ.feature.names[i] for i in x])
    display(HTML(df.to_html()))

In [None]:
show_random_elements(dataset['train'], num_examples=5)

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    model_local_dir, 
    trust_remote_code=True, 
    revision='b098244')

In [None]:
def tokenize_func(example, tokenizer, ignore_label_id=-100):
    question = prompt_text + example['content']
    if example.get('input', None) and example['input'].strip():
        question += f'\n{example["input"]}'
    answer = example['summary']
    q_ids = tokenizer.encode(text=question, add_special_tokens=False)
    a_ids = tokenizer.encode(text=answer, add_special_tokens=False)
    if len(q_ids) > max_input_length - 2:
        q_ids = q_ids[:max_input_length - 2]
    if len(a_ids) > max_output_length - 1:
        a_ids = a_ids[:max_output_length - 1]
    input_ids = tokenizer.build_inputs_with_special_tokens(q_ids, a_ids)
    question_length = len(q_ids) + 2
    labels = [ignore_label_id] * question_length + input_ids[question_length:]
    return {
        'input_ids': input_ids,
        'labels': labels
    }

In [None]:
col_names = dataset['train'].column_names
tokenized_dataset = dataset['train'].map(
    lambda example: tokenize_func(example, tokenizer),
    batched=False,
    remove_columns=col_names
)

In [None]:
tokenzied_dataset = tokenized_dataset.shuffle(seed=seed)
tokenized_dataset = tokenized_dataset.flatten_indices()

In [None]:
import torch
from typing import List, Dict, Optional


class DataCollatorForChatGLM:
    def __init__(self, pad_token_id: int, max_length: int=2048, ignore_label_id: int=-100):
        self.pad_token_id = pad_token_id
        self.ignore_label_id = ignore_label_id
        self.max_length = max_length
    
    def __call__(self, batch_data: List[Dict[str, List]]) -> Dict[str, torch.Tensor]:
        len_list = [len(d['input_ids']) for d in batch_data]
        batch_max_len = max(len_list)

        input_ids, labels = [], []
        for len_of_d, d in sorted(zip(len_list, batch_data), key=lambda x: -x[0]):
            pad_len = batch_max_len - len_of_d
            ids = d['input_ids'] + [self.pad_token_id] * pad_len
            label = d['labels'] + [self.ignore_label_id] * pad_len
            if batch_max_len > self.max_length:
                ids = ids[:self.max_length]
                label = label[:self.max_length]
            input_ids.append(torch.LongTensor(ids))
            labels.append(torch.LongTensor(label))
        input_ids = torch.stack(input_ids)
        labels = torch.stack(labels)
        return {'input_ids': input_ids, 'labels': labels}

In [None]:
data_collator = DataCollatorForChatGLM(pad_token_id=tokenizer.pad_token_id)

In [None]:
from transformers import AutoModel, BitsAndBytesConfig 

_compute_dtype_map = {
    'fp32': torch.float32,
    'fp16': torch.float16,
    'bf16': torch.bfloat16,
}

q_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=_compute_dtype_map['bf16'],
)

model = AutoModel.from_pretrained(
    model_local_dir,
    quantization_config=q_config,
    device_map='auto',
    trust_remote_code=True,
    revision='b098244'
)

model.supports_gradient_checkpointing = True
model.gradient_checkpointing_enable()
model.enable_input_require_grads()

model.config.use_cache = False


In [None]:
from peft import TaskType, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING

kbit_model = prepare_model_for_kbit_training(model)
target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING['chatglm']

In [None]:
lora_config = LoraConfig(
    target_modules=target_modules,
    r=lora_rank,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    bias='none',
    inference_mode=False,
    task_type=TaskType.CAUSAL_LM
)

In [None]:
qlora_model = get_peft_model(kbit_model, lora_config)
qlora_model.print_trainable_parameters()

In [11]:
import datetime

timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
train_epochs = 3
output_dir = f'models/{model_name}-epoch{train_epochs}-{timestamp}'

In [12]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    learning_rate=1e-3,
    num_train_epochs=train_epochs,
    lr_scheduler_kwargs='linear',
    warmup_ratio=0.1,
    logging_steps=1,
    save_strategy='steps',
    save_steps=10,
    save_total_limit=2,
    optim='adamw_torch',
    fp16=True,
)

'models/THUDM/chatglm3-6b-epoch3-20240630_171246'

In [None]:
trainer = Trainer(
    model=qlora_model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

In [None]:
trainer.train()

In [None]:
trainer.model.save_pretrained(output_dir)

#### 推理和对比

In [None]:
import torch
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig

model_name = 'THUDM/chatglm3-6b'
model_local_dir = '/root/huggingface/hub/chatglm3-6b'

In [None]:
_compute_dtype_map = {
    'fp32': torch.float32,
    'fp16': torch.float16,
    'bf16': torch.bfloat16,
}

q_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=_compute_dtype_map['bf16'],
)

base_model = AutoModel.from_pretrained(
    model_local_dir,
    quantizationc_config=q_config,
    device_map='auto',
    trust_remote_code=True,
    revision='b098244'
)

In [None]:
base_model.requires_grad_(False)
base_model.eval()

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    model_local_dir, 
    trust_remote_code=True, 
    revision='b098244')

In [None]:
from peft import PeftModel, PeftConfig

epochs = 3
timestamp = ''

peft_model_path = f"models/{model_name}-epoch{epochs}-{timestamp}"

config = PeftConfig.from_pretrained(peft_model_path)
qlora_model = PeftModel.from_pretrained(base_model, peft_model_path)
training_tag = f'ChatGLM3-6B(Epoch=3, automade-dataset(fixed))-{timestamp}'

In [None]:
def compare_chatglm_results(query):
    base_response, _ = base_model.chat(tokenizer, query)
    
    inputs = tokenizer(query, return_tensors='pt').to(0)
    ft_out = qlora_model.generate(**inputs, max_new_tokens=512)
    ft_response = tokenizer.decode(ft_out[0], skip_special_tokens=True)

    print(f'问题：{query}\n\n'
          '原始输出：\n{base_response}\n\n'
          '微调后（{training_args}）：\n{ft_response}')
    return base_response, ft_response

In [None]:
compare_chatglm_results('解释下乾卦是什么？')

In [None]:
compare_chatglm_results('周易中的讼卦是什么')

In [None]:
compare_chatglm_results('师卦是什么？')