In [2]:
!pip install accelerate transformers peft bitsandbytes



In [3]:
!pip install datasets



In [4]:
from datasets import load_dataset, Dataset
from transformers import (AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig, DataCollatorForLanguageModeling)
from peft import LoraConfig, get_peft_model, PeftModel
import pandas as pd
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
!pip install -U bitsandbytes



In [6]:
!pip install torch torchvision torchaudio



In [7]:
# Check for MPS (Metal Performance Shaders) GPU support
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple GPU via MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using Apple GPU via MPS


In [11]:
!touch .env

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [12]:
!pip install python-dotenv

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting python-dotenv
  Using cached python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Using cached python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0


In [13]:
from huggingface_hub import HfApi, login
from dotenv import load_dotenv
import os 

load_dotenv()
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
class LoRAFineTuner:
    def __init__(self, model_name,dataset_name,output_dir):
        """Initialization of the class parameter. 
        """
        print("Initialization of the class. ")
        self.model_name = model_name
        self.dataset_name = dataset_name
        self.output_dir = output_dir
        self.tokenizer = None
        self.model = None
        self.tokenized_data = None

    def load_tokenizer(self):
        """This function to define the tokenizer of the model."""
        print("Loaded Tokenizer.")
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True)
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def load_model(self):
        """This function to define the model."""
        print("Loaded Model.")

        # Bits and Bytes Configs for the Quantized Models. 
        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True, # Load the model weights in 4-bit precision (instead of 16/32-bit)
            bnb_4bit_use_double_quant = True, # Enabling the double quantization which helps in preserving the accuracy. 
            bnb_4bit_quant_type = "nf4", # normal float 4, a quantized type optimized for transformers.  
            bnb_4bit_compute_dtype = torch.float16 # Reducing the precisions of the model. 
        )
        
        # Quantization model.
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name, 
            device_map="auto",
            trust_remote_code = True # allowing custom model to use. 
            # quantization_config = bnb_config # Applies the 4-bit quantization setup from earlier
        )

        self.model.config.use_cache = False # During training, using cache = True, may causes issues with backpropagation in casual langauge model. Disabling it ensures the model doesn’t store outputs for reuse (which is good for inference, but bad for training).

    def apply_lora(self):
        """this function to define the lora model of the model. """
        print("Implementing Lora. ")
        # LoRA - Low rank adapter to save compute and memory while training.
        config = LoraConfig(
            r=16,  # size of the low-rank matrices, smaller is better to save the memory. 
            lora_alpha=32, # Scaling factor that balances LoRA updates vs. base model
            target_modules=["q_proj", "v_proj"],  # which model layers (e.g., q_proj, v_proj) to apply LoRA to.
            lora_dropout=0.05, # Dropout rate for adapter layers.
            bias="none", # Whether to fine-tune bias terms.
            task_type="CAUSAL_LM"
        )

        # In above,  LoRA applies to query and value projections of the attention layers — the most impactful and memory-heavy parts.

        # Apply LoRA on quantization model. 
        self.model = get_peft_model(
            self.model, 
            config
        )

        self.model.print_trainable_parameters()

    def load_and_tokenize_dataset(self):
        """This function will load the data and it will performs the tokenization."""
        print("Load data and tokenization. ")
        data = load_dataset(self.dataset_name, 'main', split="train")
        data_df = data.to_pandas()
        
        text_column = data_df.columns[0]
        print(text_column)

        if "question" in data_df.columns and "answer" in data_df.columns:
            data_df["text"] = data_df.apply(lambda x: f"question: {x['question']} answer: {x['answer']}", axis=1)
            print(data_df['text'])
        else:
            data_df['text'] = data_df[text_column]
        
        # Convert back to hugging face dataset. 
        data = Dataset.from_pandas(data_df)

        # Tokenizer dataset. 
        def tokenize(sample):
            return self.tokenizer(sample['text'], padding=True, truncation=True, max_length=512)

        self.tokenized_data = data.map(
            tokenize, 
            batched = True, 
            desc = "Tokenizing data", 
            remove_columns = data.column_names
        )

    def train(self, epochs: int = 1, batch_size: int = 4, learning_rate: float = 2e-4, max_steps: int = 100):
        """This function will perform the training."""
        print("Running train. ")

        training_args = TrainingArguments(
            output_dir = self.output_dir, 
            per_device_train_batch_size = batch_size, 
            gradient_accumulation_steps = 1, 
            learning_rate = learning_rate, 
            lr_scheduler_type = "cosine",
            save_strategy = "epoch",
            logging_steps = 100, 
            max_steps = max_steps, 
            num_train_epochs = epochs, 
            push_to_hub = True, 
            report_to = "none"
        )

        trainer = Trainer(
            model = self.model, 
            train_dataset = self.tokenized_data, 
            args = training_args, 
            data_collator = DataCollatorForLanguageModeling(self.tokenizer, mlm=False)
        )

        trainer.train()

    def merge_and_save_model(self, model_repo: str):
        """This function is to merge and save the model."""

        base_model = AutoModelForCausalLM.from_pretrained(
            self.model_name, 
            trust_remote_code=True, 
            torch_dtype=torch.float32
        )
        peft_model = PeftModel.from_pretrained(
            base_model, 
            self.output_dir, 
            from_transfomers = True
        )
        merged_model = peft_model.merge_and_unload()

        merged_model.push_to_hub(model_repo)
        print("Merged and saving the model...!")

    def run(self):
        """This function exectue all the process. """
        print("starting fine-tunning process. ")

        self.load_tokenizer()
        print("Tokenizer Loaded.")
        
        self.load_model()
        print("Model Loaded")

        self.apply_lora()
        print("Implementing LoRA.")

        self.load_and_tokenize_dataset()
        print("Dataset loaded and tokenized.")

        self.train()
        print("Train the model.")

        self.merge_and_save_model()
        print("Merge and save the model.")

# 
model_name = "microsoft/phi-1_5"
dataset_name = "gsm8k"
output_dir = "phi-1_5-finetuned"

fine_tunner = LoRAFineTuner(model_name,dataset_name,output_dir)

fine_tunner.run()

Initialization of the class. 
starting fine-tunning process. 
Loaded Tokenizer.
Tokenizer Loaded.
Loaded Model.
Model Loaded
Implementing Lora. 
trainable params: 3,145,728 || all params: 1,421,416,448 || trainable%: 0.2213
Implementing LoRA.
Load data and tokenization. 
question
0       question: Natalia sold clips to 48 of her frie...
1       question: Weng earns $12 an hour for babysitti...
2       question: Betty is saving money for a new wall...
3       question: Julie is reading a 120-page book. Ye...
4       question: James writes a 3-page letter to 2 di...
                              ...                        
7468    question: Very early this morning, Elise left ...
7469    question: Josh is saving up for a box of cooki...
7470    question: Colin can skip at six times the spee...
7471    question: Janet, a third grade teacher, is pic...
7472    question: At 30, Anika is 4/3 the age of Maddi...
Name: text, Length: 7473, dtype: object


Tokenizing data: 100%|██████████| 7473/7473 [00:00<00:00, 7759.96 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Dataset loaded and tokenized.
Running train. 




Step,Training Loss
100,1.1396


Train the model.


TypeError: LoRAFineTuner.merge_and_save_model() missing 1 required positional argument: 'model_repo'