In [None]:
model_name = "teknium/OpenHermes-2.5-Mistral-7B"
max_seq_length = 2048
learning_rate = 2e-4
weight_decay = 0.01
max_steps = 60
warmup_steps = 10
batch_size = 8
gradient_accumulation_steps = 4
lr_scheduler_type = "linear"
optimizer = "adamw_8bit"
use_gradient_checkpointing = True
random_state = 3407

In [None]:
import torch
from unsloth import FastLanguageModel

max_seq_length = 2048
dtype = torch.bfloat16 # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
HAS_BFLOAT16 = torch.cuda.is_bf16_supported()

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    device_map = "auto",
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Currently only supports dropout = 0
    bias = "none",    # Currently only supports bias = "none"
    use_gradient_checkpointing = True,
    random_state = 3407,
    max_seq_length = max_seq_length,
)

In [None]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="data/cleaned_data.json", field="data", split="train")



In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging
logging.set_verbosity_info()

trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    # dataset_text_field = "content",
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    args = TrainingArguments(
        per_device_train_batch_size = batch_size,
        gradient_accumulation_steps = gradient_accumulation_steps,
        warmup_steps = warmup_steps,
        num_train_epochs = 2,
        learning_rate = learning_rate,
        fp16 = not HAS_BFLOAT16,
        bf16 = HAS_BFLOAT16,
        logging_steps = 1,
        output_dir = "outputs",
        optim = optimizer,
        weight_decay = weight_decay,
        lr_scheduler_type = lr_scheduler_type,
        seed = random_state,
    ),
)

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()

In [None]:
output_dir = "outputs/test"

trainer.save_model(output_dir)
trainer.save_state()
torch.save(torch.cuda.random.get_rng_state_all(), f"{output_dir}/rng_state.pth")
torch.save(trainer.optimizer.state_dict(), f"{output_dir}/optimizer.pt")
torch.save(trainer.lr_scheduler.state_dict(), f"{output_dir}/scheduler.pt")



In [None]:

from torchsummary import summary

print(model)

summary(model, (max_seq_length))



In [None]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

import torch

base_model_name = "teknium/OpenHermes-2.5-Mistral-7B" #path/to/your/model/or/name/on/hub"
adapter_model_name = "./peft/bmr" #path/to/your/model/or/name/on/hub

device_map = { 'lm_head': 0, 'model.embed_tokens': 0, 'model.norm.weight': 0}


for i in range(32):
    if i <= 16:
        device_map[f"model.layers.{i}"] = 0
    else:
        device_map[f"model.layers.{i}"] = 'cpu'

model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map = device_map, torch_dtype=torch.float16, low_cpu_mem_usage=True) 

peft_map = {'base_model.model.lm_head': 'cpu', 'base_model.model.model.embed_tokens': 'cpu', 'base_model.model.model.norm.weight': 'cpu'}
for i in range(32):
        peft_map[f"base_model.model.model.layers.{i}"] = 'cpu'

model = PeftModel.from_pretrained(model, adapter_model_name, device_map = peft_map)


save_dir = 'OpenBhi-3.0-Mistral-7B'

model = model.merge_and_unload()

model.save_pretrained(save_dir)

# Assuming you have already created the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)


# Save the tokenizer to a directory
tokenizer.save_pretrained(save_dir)


In [None]:
# Assuming you have already created the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)


# Save the tokenizer to a directory
tokenizer.save_pretrained("./")