In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

secret_value_0 = os.getenv('HF_TOKEN')

from huggingface_hub import login
login(token = secret_value_0)

import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM

from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer
from peft import LoraConfig, get_peft_model
import pandas as pd
from swarm_descriptions.mission_elements import get_generators, MissionParams
from swarm_descriptions.configfiles import config_to_string
from swarm_descriptions.utils import truncate_floats
import random
import pyarrow as pa
import pyarrow.dataset as ds
import pickle
import numpy as np
import random
import re
import pathlib
import yaml
from sklearn.model_selection import train_test_split
from datasets import Dataset
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments
from peft import PeftModel

random.seed(42)
np.random.seed(42)

  from .autonotebook import tqdm as notebook_tqdm
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:

def load_foundation_model_and_tokenizer(adapter_save_path):
    lora_config = LoraConfig(
        r=3,  # smaller lora dimension? original 16
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )

    model_name = "mistralai/Mathstral-7B-v0.1"
    #model_name = "Qwen/Qwen2.5-0.5B-Instruct"

    compute_dtype = getattr(torch, "float16")
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
    )

    # Load the base model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
    )
    model.config.use_cache = False
    
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token = tokenizer.unk_token

    # Load the adapter into the model
    model = get_peft_model(model, lora_config)

    # Save the adapter to the specified directory
    model.save_pretrained(adapter_save_path)

    return model_name, tokenizer, adapter_save_path, bnb_config

# Specify the path where you want to save the adapter
adapter_save_path = "./peft_adapter"

# Load the model and save the adapter
model_name, tokenizer, adapter_save_path, bnb_config = load_foundation_model_and_tokenizer(adapter_save_path)

# Load the base model again
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

# Load the adapter twice with different names
model = PeftModel.from_pretrained(model, adapter_save_path, adapter_name="train_adapt")
model.load_adapter(adapter_save_path, adapter_name="reference_adapt")

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 6/6 [00:42<00:00,  7.07s/it]
`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 6/6 [00:16<00:00,  2.75s/it]


_IncompatibleKeys(missing_keys=['base_model.model.model.embed_tokens.weight', 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.train_adapt.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.train_adapt.weight', 'base_model.model.model.layers.0.self_attn.k_proj.weight', 'base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_A.train_adapt.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_B.train_adapt.weight', 'base_model.model.model.layers.0.self_attn.o_proj.weight', 'base_model.model.model.layers.0.mlp.gate_proj.weight', 'base_model.model.model.layers.0.mlp.up_proj.weight', 'base_model.model.model.layers.0.mlp.down_proj.weight', 'base_model.model.model.layers.0.input_layernorm.weight', 'base_model.model.model.layers.0.post_attention_layernorm.weight', 'base_model.model.model.layers.1.self_attn.q_proj.base_layer.

In [4]:
# train_dpo.py
from datasets import load_dataset
from trl import DPOConfig, DPOTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer.pad_token = tokenizer.eos_token
train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
train_dataset = train_dataset.select(range(100)) # for fast debugging and testing

# max prompt and completion length are indispensible for not crashing. gradient_checkpointing does reduce memory significantly but not really improving loss consistently (also conflicts with potential requirement cache=False in model loading)
training_args = DPOConfig(output_dir="DPO", report_to="none",    model_adapter_name="train_adapt",
    ref_adapter_name="reference_adapt",per_device_train_batch_size=1, per_device_eval_batch_size=1, logging_dir="logs",logging_steps=10,gradient_accumulation_steps=1,eval_accumulation_steps=1, max_prompt_length=200,max_completion_length=750) 
trainer = DPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()

Tokenizing train dataset: 100%|██████████| 100/100 [00:00<00:00, 480.84 examples/s]
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,0.6953
20,0.6926
30,0.6935
40,0.6944
50,0.6953
60,0.6896
70,0.6883
80,0.692
90,0.6908
100,0.6881


TrainOutput(global_step=300, training_loss=0.6701448567708334, metrics={'train_runtime': 287.4167, 'train_samples_per_second': 1.044, 'train_steps_per_second': 1.044, 'total_flos': 0.0, 'train_loss': 0.6701448567708334, 'epoch': 3.0})