<a href="https://colab.research.google.com/github/nnilayy/MedGPT/blob/main/PEFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
checkpoint = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding_side="right",)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
   load_in_8bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(checkpoint,
                                             device_map = "auto",
                                             quantization_config = bnb_config
                                             )

In [None]:
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
peft_config = LoraConfig(inference_mode=False,
                         r=8,
                         lora_alpha = 32,
                         lora_dropout = 0.1,
                         bias="none",
                         peft_type = TaskType.CAUSAL_LM, #" CAUSAL_LM"
                         )

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.gradient_checkpointing_enable()
print(model.print_trainable_parameters())

In [None]:
from transformers import TrainingArguments, Trainer
import bitsandbytes

EPOCHS = 3
LEARNING_RATE = 1e-4
MODEL_SAVE_FOLDER_NAME = "dolly-3b-lora"
training_args = TrainingArguments(
                    output_dir=MODEL_SAVE_FOLDER_NAME,
                    overwrite_output_dir=True,
                    fp16=True,
                    per_device_train_batch_size=1,
                    per_device_eval_batch_size=1,
                    learning_rate=LEARNING_RATE,
                    num_train_epochs=EPOCHS,
                    logging_strategy="epoch",
                    evaluation_strategy="epoch",
                    save_strategy="epoch",
)

trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=split_dataset['train'],
        eval_dataset=split_dataset["test"],
        data_collator=data_collator,
)

model.config.use_cache = False
trainer.train()

trainer.model.save_pretrained(MODEL_SAVE_FOLDER_NAME)

trainer.save_model(MODEL_SAVE_FOLDER_NAME)
trainer.model.config.save_pretrained(MODEL_SAVE_FOLDER_NAME)

# The pretrained LLM is combined with the small trained weights from PEFT techniques and
# this model is used for numerous tasks

# The above figure shows the result of LLaMA 2 7B model trained on different floating points and results of
# models on various tasks. The model trained on NF4 and float 4-bit gives better results than LoRA and LLaMA 2 7B
# base model, while 4-bit NormalFloat perform slightly better performance than float4 datatype. QLoRA decreases the
# memory requirements by almost using NF4 type. However, the tradeoff is a slower training time, which is to be
# expected due to the quantization and dequantization steps.