<a href="https://colab.research.google.com/github/nnilayy/MedGPT/blob/main/PEFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Loading the model in 8-bit and 4-bit
checkpoint = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding_side="right",)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
   load_in_8bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(checkpoint,
                                             device_map = "auto",
                                             quantization_config = bnb_config
                                             )

In [None]:
# TRAINING
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
peft_config = LoraConfig(inference_mode=False,
                         r=8,
                         lora_alpha = 32,
                         lora_dropout = 0.1,
                         bias="none",
                         peft_type = TaskType.CAUSAL_LM, #" CAUSAL_LM"
                         )

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.gradient_checkpointing_enable()
print(model.print_trainable_parameters())

In [None]:
# TYPICAL TRAINING CODE
training_args = TrainingArguments(
    output_dir="your-name/bigscience/mt0-large-lora",
    learning_rate=1e-3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
# INFERENCE
from peft import AutoPeftModel , AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch

# model = AutoPeftModel.from_pretrained("smangrul/openai-whisper-large-v2-LORA-colab")
model = AutoPeftModelForCausalLM.from_pretrained("ybelkada/opt-350m-lora")

tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
model = model.to("cuda")

model.eval()
inputs = tokenizer("Preheat the oven to 350 degrees and place the cookie dough", return_tensors="pt")
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=50)
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

In [None]:
# MERGE LORA WEIGHTS WITH BASE MODEL
from transformers import AutoModelForCausalLM
from peft import PeftModel

# Assuming 'base_model' is your pre-trained model's name or path
base_model = AutoModelForCausalLM.from_pretrained("base_model_name_or_path")
peft_model = PeftModel.from_pretrained(base_model, "path_to_trained_adapter")
merged_model = peft_model.merge_and_unload()