In [1]:
import pandas as pd
from datasets import load_dataset

In [2]:
# One must patch the DPO Trainer first!
from unsloth import PatchDPOTrainer
PatchDPOTrainer()

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Crysiss/llama-3-8B-university-sft-v0.3",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth: Fast Llama patching release 2024.4
   \\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.628 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.2. CUDA = 8.9. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [4]:
dpo_data_path = '../dpo/data/university_orpo_data.jsonl'
# eval_data_path = '../data/welfare/test.jsonl'
dposet = load_dataset("json", data_files = dpo_data_path, split='train')

Generating train split: 0 examples [00:00, ? examples/s]

In [7]:
prompt_1 = """### Task
Generate a SQL query to answer the following question:
`{}`
 
### Database Schema
This query will run on a database whose schema is represented in this string:
"{}"
 
### SQL
Given the database schema, here is the SQL query that answers `{}`:
[SQL]{}"""

EOS_TOKEN = tokenizer.eos_token

def format_prompt(sample):
    input       = sample["input"]
    accepted    = sample["accept"]
    rejected    = sample["reject"]
    context     = sample["context"]

    sample["prompt"]   = prompt_1.format(input, context, input, "")
    sample["chosen"]   = accepted + EOS_TOKEN
    sample["rejected"] = rejected + EOS_TOKEN
    return sample
pass

dataset = dposet.map(format_prompt,)
dataset = dataset.remove_columns(['input', 'accept', 'reject', 'context'])

Map:   0%|          | 0/398 [00:00<?, ? examples/s]

In [8]:
print(dataset['prompt'][1])

### Task
Generate a SQL query to answer the following question:
`지역명과 지역별 Pass 등급을 받은 학생들의 평균 학점`
 
### Database Schema
This query will run on a database whose schema is represented in this string:
"CREATE TABLE studentinfo (code_module varchar(45) NOT NULL -- 'an identification code for a module on which the student is registered.', code_presentation varchar(45) NOT NULL -- 'an identification code for a module on which the student is registered.an identification code for a module on which the student is registered.an identification code for a module on which the student is registered.an identification code for a module on which the student is registered.the identification code of the presentation during which the student is registered on the module.', id_student int NOT NULL -- 'a unique identification number for the student.', gender varchar(3) -- 'the student’s gender.', region varchar(45) -- 'identifies the geographic region, where the student lived while taking the module-presenta

In [9]:
from datetime import datetime
import wandb, os
wandb.login()

model_name = "meta-llama/Meta-Llama-3-8B"  
wandb_project = "llama-3-university-dpo"
if len(wandb_project) > 0:
    os.environ["WANDB_PROJECT"] = wandb_project
    
project = "Qlora-4bit"
run_name = model_name + "-" + project

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcrysis[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [10]:
from transformers import TrainingArguments
from trl import DPOTrainer

dpo_trainer = DPOTrainer(
    model = model,
    ref_model = None,
    args = TrainingArguments(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 2,
        warmup_ratio = 0.1,
        # num_train_epochs = 2,
        max_steps = 1000,
        learning_rate = 2e-5,
        max_grad_norm= 0.3,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "paged_adamw_8bit",
        weight_decay = 0.0,
        lr_scheduler_type = "cosine",
        seed = 42,
        output_dir = "outputs",
        report_to="wandb",
        logging_strategy = 'steps',
        run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"
    ),
    beta = 0.1,
    train_dataset = dataset,
    # eval_dataset = raw_datasets["test"],
    tokenizer = tokenizer,
    max_length = 2048,
    max_prompt_length = 2048,
)



Map:   0%|          | 0/398 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [11]:
dpo_trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 398 | Num Epochs = 6
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 2
\        /    Total batch size = 2 | Total steps = 1,000
 "-____-"     Number of trainable parameters = 167,772,160


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,rewards / chosen,rewards / rejected,rewards / accuracies,rewards / margins,logps / rejected,logps / chosen,logits / rejected,logits / chosen
1,0.2582,3.518131,2.2932,1.0,1.224931,-27.097851,-32.941525,-0.792981,-0.787659
2,0.3795,3.435399,2.66222,1.0,0.773179,-31.211334,-40.962135,-0.755429,-0.764873
3,0.5082,2.949077,2.46733,0.5,0.481747,-25.017277,-36.796627,-0.797908,-0.800835
4,0.8224,2.325364,2.486775,0.5,-0.161411,-24.97699,-31.275009,-0.669119,-0.681884
5,0.9887,2.246981,2.676776,0.5,-0.429795,-28.490131,-72.738228,-0.666729,-0.72957
6,0.6773,2.253478,2.221229,0.5,0.032249,-24.532341,-27.217766,-0.819441,-0.818806
7,1.0165,1.073188,1.612925,0.0,-0.539737,-26.002781,-36.66151,-0.634507,-0.636294
8,0.6885,2.183093,1.776892,0.5,0.4062,-26.529436,-29.663986,-0.802099,-0.812316
9,0.9932,1.712181,2.093979,0.5,-0.381798,-27.516277,-27.72596,-0.658138,-0.65589
10,0.6931,1.401599,1.401599,0.0,0.0,-23.027386,-23.027386,-0.791484,-0.791484


In [13]:
# model.save_pretrained("llama3-8B-welfare-rollback") # Local saving
model.push_to_hub("Crysiss/llama-3-8B-university-dpo-v0.1") # Online saving

README.md:   0%|          | 0.00/574 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/671M [00:00<?, ?B/s]

Saved model to https://huggingface.co/Crysiss/llama-3-8B-university-dpo-v0.1


In [21]:
eval_data_path = './data/test_kor_data.jsonl'
evalset = load_dataset("json", data_files = eval_data_path, split='train')

Generating train split: 0 examples [00:00, ? examples/s]

In [28]:
testset = evalset.map(format_prompt,)
# testset = testset.remove_columns(['input', 'accept', 'reject', 'context'])

In [29]:
testset

Dataset({
    features: ['input', 'accept', 'reject', 'context', 'prompt', 'chosen', 'rejected'],
    num_rows: 100
})

In [33]:
j = 5
inputs = tokenizer(
    [
        prompt_1.format(
            f"{evalset[j]['input']}", # input
            f"{evalset[j]['context']}",
            f"{evalset[j]['input']}",
            "", 
        )
    ], return_tensors = "pt").to("cuda")
# pprint(inputs)
outputs = model.generate(**inputs,
                         max_new_tokens = 300,
                        #  pad_token_id= tokenizer.eos_token_id,
                        #  eos_token_id= tokenizer.eos_token_id,
                         output_scores=True,
                        #  logits_processor =[EosTokenRewardLogitsProcessor(eos_token_id=tokenizer.eos_token_id, max_length=300)],
                         use_cache = True)
result = tokenizer.batch_decode(outputs)
result = result[0].split("[SQL]")[-1].split("[/SQL]")[0]
torch.cuda.empty_cache()
# print(f"Inference: {result}\nLabel: {evalset['output'][j]}")
print(f"Inference: {result}\nLabel: {testset[j]['accept']}")
# infer_query_result = mysql_query(db, result)
# real_query_result = mysql_query(db, label)
# comp = any(item in real_query_result for item in infer_query_result)
# pritn(f"\ncomparison: {comp}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Inference: SELECT si.imd_band AS IMD_Band, ASSESSMENT_TYPE AS Assessment_Type, COUNT(DISTINCT ASSESSMENTS.id_assessment) AS ASSESSMENT_COUNT FROM studentInfo si JOIN assessments AS ASSESSMENTS ON si.id_student = ASSESSMENTS.id_student GROUP BY si.imd_band, ASSESSMENT_TYPE ORDER BY ASSESSMENT_COUNT DESC;<|end_of_text|>
Label: SELECT si.imd_band, a.assessment_type, COUNT(*) AS num_assessments FROM studentinfo si INNER JOIN assessments a ON si.code_module = a.code_module AND si.code_presentation = a.code_presentation GROUP BY si.imd_band, a.assessment_type ORDER BY si.imd_band, num_assessments DESC;


In [18]:
evalset[j]['input']

'그래그래 도서관의 기본 주소를 알려줘'