In [None]:
import torch

major_version, minor_version = torch.cuda.get_device_capability() # Check CUDA compatibility
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

if major_version >= 8:
  !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
  !pip install --no-deps xformers trl peft accelerate bitsandbytes

Collecting unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-jf1w_yqn/unsloth_27112194ea924a4aa9ca214b197ccb0e
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-jf1w_yqn/unsloth_27112194ea924a4aa9ca214b197ccb0e
  Resolved https://github.com/unslothai/unsloth.git to commit 933d9fe2cb2459f949ee2250e90a5b610d277eab
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tyro (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading tyro-0.8.5-py3-none-any.whl (103 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.4/103.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting datasets>=2.16.0 (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Dow

# DPO files with DPO formatted

In [None]:
!pip install datasets



In [None]:
import json
from datasets import Dataset

dpo_path = "/content/answer_chatgpt3_5.json"

try:
    with open(dpo_path, encoding='utf-8', errors='ignore') as f:
        dpo_dataset_dict = json.load(f)

    dpo_dataset_dict['rejected'] = dpo_dataset_dict['reject']
    del dpo_dataset_dict['reject']

    train_dataset = Dataset.from_dict(dpo_dataset_dict)
    print(train_dataset)  # Check if the dataset loads successfully

except json.JSONDecodeError as e:
    print(f"Error decoding JSON: {e}")
    # You might want to add logic here to try and fix the JSON file,
    # or to handle the error in a way that makes sense for your application.

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 1557
})


In [None]:
import torch
from trl import DPOConfig, DPOTrainer
from unsloth import FastLanguageModel

max_seq_length = 2048 # Supports automatic RoPE Scaling, so choose any number.

# Load model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "scb10x/llama-3-typhoon-v1.5x-8b-instruct",
    max_seq_length = max_seq_length,
    # dtype=torch.float16, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    # load_in_4bit = True, # Use 4bit quantization to reduce memory usage. Can be False.
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

# model = model.to(torch.float16)

# Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Dropout = 0 is currently optimized
    bias = "none",    # Bias = "none" is currently optimized
    use_gradient_checkpointing = True,
    random_state = 3407,
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth: Fast Llama patching release 2024.6
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = True.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
scb10x/llama-3-typhoon-v1.5x-8b-instruct does not have a padding token! Will use pad_token = <|reserved_special_token_250|>.
Unsloth 2024.6 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
training_args = DPOConfig(
    # evaluation_strategy = "steps",
    # eval_steps = 100,
    save_strategy = "epoch",
    # per_device_eval_batch_size = 1,
    per_device_train_batch_size = 1, #Zephyr
    gradient_accumulation_steps = 16, #Zephyr
    warmup_ratio = 0.1, #Zephyr
    num_train_epochs = 2, #Zephyr
    learning_rate = 5.0e-07, #Zephyr
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 100,
    optim = "paged_adamw_8bit",
    lr_scheduler_type = "cosine", #Zephyr
    seed = 3407,
    output_dir = "./typhoon_DPO/"
)

dpo_trainer = DPOTrainer(
    model,
    ref_model=None,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
)
dpo_trainer.train()



Map:   0%|          | 0/1557 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,557 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 16
\        /    Total batch size = 16 | Total steps = 194
 "-____-"     Number of trainable parameters = 41,943,040
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
100,0.6898


TrainOutput(global_step=194, training_loss=0.6854819661563205, metrics={'train_runtime': 1759.0328, 'train_samples_per_second': 1.77, 'train_steps_per_second': 0.11, 'total_flos': 0.0, 'train_loss': 0.6854819661563205, 'epoch': 1.9935773924213231})

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
