In [None]:
from llmin.core.minimizer import Minimizer

minimizer = Minimizer(
    target_model_id = "mistralai/Mistral-7B-v0.1",
    compression_factor=0.3
)

base_model = minimizer.compress()
adapters = minimizer.get_lora_adapters(base_model = base_model, rank = 64)

del base_model
del minimizer

### Testing Serialization

In [None]:
merged.save_pretrained("./models/merged")

In [None]:
from transformers import AutoModelForCausalLM

merged = AutoModelForCausalLM.from_pretrained("./models/merged")

In [None]:
merged.state_dict()["model.layers.16.input_layernorm.weight"]

In [None]:
from transformers import pipeline 

pipe = pipeline(
    "text-generation",
    model = merged,
    tokenizer = "mistralai/Mistral-7B-v0.1",
    device_map = "auto"
)

pipe("What is ML?")

### Fine Tuning

In [None]:
!pip install datasets -q

In [None]:
from datasets import load_dataset

instruct_tune_dataset = load_dataset("mosaicml/instruct-v3")
instruct_tune_dataset = instruct_tune_dataset.filter(lambda x: x["source"] == "dolly_hhrlhf")
instruct_tune_dataset["train"] = instruct_tune_dataset["train"].select(range(1000))
instruct_tune_dataset["test"] = instruct_tune_dataset["test"].select(range(200))

In [None]:
def create_prompt(sample):
    bos_token = "<s>"
    original_system_message = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    system_message = "Use the provided input to create an instruction that could have been used to generate the response with an LLM."
    response = sample["prompt"].replace(original_system_message, "").replace("\n\n### Instruction\n", "").replace("\n### Response\n", "").strip()
    input = sample["response"]
    eos_token = "</s>"

    full_prompt = ""
    full_prompt += bos_token
    full_prompt += "### Instruction:"
    full_prompt += "\n" + system_message
    full_prompt += "\n\n### Input:"
    full_prompt += "\n" + input
    full_prompt += "\n\n### Response:"
    full_prompt += "\n" + response
    full_prompt += eos_token

    return full_prompt

In [None]:
!pip install bitsandbytes -q

In [None]:
from transformers import BitsAndBytesConfig
import torch

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
from peft.tuners.lora.config import LoraConfig

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
merged.device

In [None]:
from peft import prepare_model_for_kbit_training, get_peft_model

merged = prepare_model_for_kbit_training(merged)
merged = get_peft_model(merged, peft_config)

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
  output_dir = "mistral_instruct_generation",
  num_train_epochs=5,
  gradient_accumulation_steps = 128,
  max_steps = 100, 
  per_device_train_batch_size = 1,
  per_device_eval_batch_size = 1,
  warmup_steps = 0.03,
  logging_steps=10,
  save_strategy="epoch",
  evaluation_strategy="steps",
  eval_steps=20,
  learning_rate=2e-4,
  bf16=True,
  lr_scheduler_type='constant',
)

In [None]:
!pip install trl -q

In [None]:
from trl import SFTTrainer

max_seq_length = 2048

trainer = SFTTrainer(
  model=merged,
  peft_config=peft_config,
  max_seq_length=max_seq_length,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=create_prompt,
  args=args,
  train_dataset=instruct_tune_dataset["train"],
  eval_dataset=instruct_tune_dataset["test"]
)

In [None]:
trainer.train()