In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install evaluate
!pip install -qqq trl==0.7.1

In [None]:
!pip install flash_attn

In [None]:
import torch
import time
import evaluate
import pandas as pd
import numpy as np
from datasets import Dataset, load_dataset
import random

In [None]:
huggingface_dataset_name = "unknown/Fine-tuning-Dataset-2730_2730"

dataset = load_dataset(huggingface_dataset_name)

dataset

In [None]:
sample_dataset = dataset.filter(lambda example, index: index % 100 == 0, with_indices=True)

In [None]:
def process_dataset(data: Dataset):
    return (
        data.shuffle(seed=42)
    )

In [None]:
sample_dataset = dataset.filter(lambda example, index: index % 100 == 0, with_indices=True)

In [None]:
sample_dataset["train"] = process_dataset(sample_dataset["train"])

In [None]:
train_data = dataset['train'].shuffle(seed=42).select([i for i in range(2730)])

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "NousResearch/Yarn-Mistral-7b-128k"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

**Training**

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():

        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
print(model)

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

trainable params: 13631488 || all params: 3765702656 || trainable%: 0.36199055648434075


In [None]:
OUTPUT_DIR = "mistral-docsum-adapter"

In [None]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=3,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=OUTPUT_DIR,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)
model.config.use_cache = False

In [None]:
from trl import SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=train_data,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)

trainer.train()

Map:   0%|          | 0/2730 [00:00<?, ? examples/s]

Map:   0%|          | 0/2730 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
102,0.3445,0.333291
204,0.0525,0.316377
306,0.1389,0.146427
408,0.0763,0.103021
510,0.132,0.093973




TrainOutput(global_step=510, training_loss=0.24642368359308617, metrics={'train_runtime': 3148.5293, 'train_samples_per_second': 2.601, 'train_steps_per_second': 0.162, 'total_flos': 6.165253647561523e+16, 'train_loss': 0.24642368359308617, 'epoch': 2.99})

In [None]:
peft_model_path="./peft-logparsing"

trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

('./peft-logparsing/tokenizer_config.json',
 './peft-logparsing/special_tokens_map.json',
 './peft-logparsing/tokenizer.model',
 './peft-logparsing/added_tokens.json',
 './peft-logparsing/tokenizer.json')

In [None]:
from transformers import TextStreamer
model.config.use_cache = True
model.eval()

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

peft_model_dir = "peft-logparsing"

# load base LLM model and tokenizer
trained_model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained(peft_model_dir)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

**Merge Trained LoRA Adapter With BASE MODEL and Push Model to Hub**

In [None]:
trained_model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
)
# Merge LoRA and base model
merged_model = trained_model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("merged_model",safe_serialization=True)
tokenizer.save_pretrained("merged_model")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('merged_model/tokenizer_config.json',
 'merged_model/special_tokens_map.json',
 'merged_model/tokenizer.model',
 'merged_model/added_tokens.json',
 'merged_model/tokenizer.json')

**Push Model to Hub**

In [None]:
import getpass
import os

os.environ["HUGGING_FACE_HUB_TOKEN"] = getpass.getpass("Token: ")
assert os.environ["HUGGING_FACE_HUB_TOKEN"]

Token: ··········


In [None]:
merged_model.push_to_hub("unknown/Yarn-Mistral-7b-128k_Fine-Tuned4LogParsing")
tokenizer.push_to_hub("unknown/Yarn-Mistral-7b-128k_Fine-Tuned4LogParsing")