### PEFT (Parameter-Efficient Fine Tuning) and LoRA (Low-rank Adaptation)

The following is from the second notebook of Large-Language-Model-Notebooks-Course:

https://github.com/peremartra/Large-Language-Model-Notebooks-Course/blob/main/5-Fine%20Tuning/5_2_LoRA_Tuning.ipynb

In [None]:
stories = True

In [None]:
!pip install -q peft==0.10.0
!pip install -q datasets==2.19.0

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloomz-560m"
#model_name="bigscience/bloom-1b1"
target_modules = ["query_key_value"]

#### model_name = "ajibawa-2023/Young-Children-Storyteller-Mistral-7B" # This model is HUGE!
#model_name = 'mistralai/Mistral-7B-v0.1'
#target_modules = ["q_proj", "v_proj"]


#device = "cuda" 
#device = "mps"
device = 'cpu'

tokenizer = AutoTokenizer.from_pretrained(model_name)

foundation_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                        device_map = device)

### Inference with the pretrained model

In [None]:
#this function returns the outputs from the model received, and inputs.
def get_outputs(model, inputs, max_new_tokens=100):

    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5, #Avoid repetition.
        #early_stopping=True, #The model can stop before reach the max_length

        eos_token_id=tokenizer.eos_token_id
    )
    
    return outputs

In [None]:
#Inference original model
input_sentences = tokenizer("I want you to act as a motivational coach. ", return_tensors="pt")

foundational_outputs_sentence = get_outputs(foundation_model,
                                            input_sentences.to(device),
                                            max_new_tokens=50)

print(tokenizer.batch_decode(foundational_outputs_sentence, skip_special_tokens=True))

The completion does not look like a prompt. We need to train our model if we want that it acts like a prompt engineer.

In [None]:
from datasets import load_dataset

In [None]:
if stories:
    #get dataset: https://huggingface.co/datasets/lilithyu/kaggle-child-stories
    dataset = "lilithyu/kaggle-child-stories"
    #Create the Dataset 
    data = load_dataset(dataset)
    data = data.map(lambda samples: tokenizer(samples["text"]), batched=True)
    train_sample = data["train"].select(range(50))

else:
    # This is the original example for prompt generation

    #get prompts from a dataset: https://huggingface.co/datasets/fka/awesome-chatgpt-prompts
    dataset = "fka/awesome-chatgpt-prompts"
    #Create the Dataset to create prompts.
    data = load_dataset(dataset)
    data = data.map(lambda samples: tokenizer(samples["prompt"]), batched=True)
    train_sample = data["train"].select(range(50))
    train_sample = train_sample.remove_columns('act')

display(train_sample)

In [None]:
if stories:
    select = 'text'
else:
    select = 'prompt'
    
train_sample[0][select]

In [None]:
train_sample[0]['input_ids']

### Now the fine-tuning with LoRA

In [None]:
# create LoRA config
import peft
from peft import LoraConfig, get_peft_model, PeftModel

lora_config = LoraConfig(
    r=4, #The bigger the r the more parameters to train.
    lora_alpha=1, # A multiplier controlling the overall strength of connections within a neural network, typically set at 1.
    target_modules=target_modules, #You can obtain a list of target modules in the URL above.
    lora_dropout=0.05, #Helps to avoid Overfitting.
    bias="lora_only", # Controls whether the bias term is adjusted alongside the model weights during training.
    task_type="CAUSAL_LM"
)    

In [None]:
# create PEFT (Parameter-Efficient Fine-Tuning) model
peft_model = get_peft_model(foundation_model, lora_config)
print(peft_model.print_trainable_parameters())

In [None]:
#Create a directory to contain the Model
import os
working_dir = './'

In [None]:
if stories:
    output_directory = os.path.join(working_dir, "peft_story_outputs")
else:
    output_directory = os.path.join(working_dir, "peft_prompt_outputs")    

In [None]:
#Creating the TrainingArgs
import transformers
from transformers import TrainingArguments, Trainer

In [None]:
training_args = TrainingArguments(
    output_dir=output_directory,
    auto_find_batch_size=True, # Find a correct batch size that fits the size of Data.
    learning_rate= 3e-2, # The Learning Rate is usually higher than in full fine-tuning.
    #optim="sgd", #Use only to test a different optimizer
    num_train_epochs=2,
    use_cpu=True
)

Now we can train the model. To train the model we need:

- The PEFT Model.
- The training_args
- The Dataset
- The result of DataCollator, the Dataset ready to be procesed in blocks.

In [None]:
#This cell may take up to 15 minutes to execute.
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=transformers.DataCollatorForLanguageModeling(
        tokenizer,
        mlm=False)
)
trainer.train()

In [None]:
#Save the model.
peft_model_path = os.path.join(output_directory, f"lora_model")

trainer.model.save_pretrained(peft_model_path)

In [None]:
#Load the Model.
loaded_model = PeftModel.from_pretrained(foundation_model,
                                        peft_model_path,
                                        is_trainable=False)
loaded_model.to(device)

### Inference with the fine-tuned model

In [None]:
input_sentences = tokenizer("I want you to act as a motivational coach. ", return_tensors="pt")
finetuned_outputs_sentence = get_outputs(loaded_model,
                                          input_sentences.to(device),
                                          max_new_tokens=50)

print(tokenizer.batch_decode(finetuned_outputs_sentence, skip_special_tokens=True))