# Action Items:
### 1) Incorportate flash attn
### 2) Diferent experiment for quantization and LORA
### 3) An Eval script to eval on [Benchmark](https://github.com/meta-llama/PurpleLlama/tree/main/CybersecurityBenchmarks)

In [1]:
# !pip install datasets -Uqq
# !pip install trl -Uqq
# !pip install peft -Uqq

In [2]:
from datasets import load_dataset
from trl import DPOTrainer, DPOConfig
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)

In [3]:
dataset = load_dataset("CyberNative/Code_Vulnerability_Security_DPO")

In [4]:
dataset['train']

Dataset({
    features: ['lang', 'vulnerability', 'system', 'question', 'chosen', 'rejected'],
    num_rows: 4656
})

In [5]:
ds = dataset.select_columns(['question', 'chosen', 'rejected'])

In [6]:
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'

In [7]:
# !pip install flash-attn==2.6.3 --no-build-isolation

In [8]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,      
    llm_int8_threshold=6.0
)

In [9]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto", 
    quantization_config=bnb_config,
    # use_flash_attn= True
)

In [10]:
lora_config = LoraConfig(
task_type="CAUSAL_LM",
inference_mode=False,
r=8,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["q_proj", "v_proj"]
)

In [None]:
import peft
model = peft.get_peft_model(model, lora_config)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [27]:
def get_max_lengths(dataset, tokenizer):
    max_prompt_len = 0
    max_total_len = 0
    for example in dataset:
        # Tokenize the prompt and the chosen response
        prompt_tokens = tokenizer(example["question"], add_special_tokens=False)["input_ids"]
        chosen_tokens = tokenizer(example["chosen"], add_special_tokens=False)["input_ids"]
        
        # Calculate lengths
        prompt_len = len(prompt_tokens)
        total_len = prompt_len + len(chosen_tokens)

        # Update maximum lengths
        if prompt_len > max_prompt_len:
            max_prompt_len = prompt_len
        if total_len > max_total_len:
            max_total_len = total_len
    return max_prompt_len, max_total_len

# Example usage assuming your dataset is stored under dataset["train"]
max_prompt_length, max_length = get_max_lengths(dataset["train"], tokenizer)
print("Max prompt length:", max_prompt_length)
print("Max total length (prompt + chosen):", max_length)

Max prompt length: 261
Max total length (prompt + chosen): 2267


In [28]:
dpo_config = DPOConfig(
    beta=0.1,
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    max_length=1024,
    max_prompt_length=280,
    # gradient_accumulation_steps=2,
    logging_steps=10,
    output_dir="dpo-trained-model",
    optim="paged_adamw_32bit"
)


In [29]:
dpo_trainer = DPOTrainer(
    model=model,
    ref_model=None,
    args=dpo_config,
    train_dataset=ds["train"],
    tokenizer=tokenizer
)

  dpo_trainer = DPOTrainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [30]:
# !pip install -U bitsandbytes

In [31]:
# Not enough RAM
dpo_trainer.train()

Step,Training Loss
10,0.6114
20,0.6351
30,0.662
40,0.6417
50,0.5972
60,0.5989
70,0.5526
80,0.5446
90,0.5574
100,0.533


TrainOutput(global_step=3492, training_loss=0.11263048025818898, metrics={'train_runtime': 1844.8631, 'train_samples_per_second': 7.571, 'train_steps_per_second': 1.893, 'total_flos': 0.0, 'train_loss': 0.11263048025818898, 'epoch': 3.0})