<a href="https://colab.research.google.com/github/mahmoudshaddad/AI/blob/main/trainable_parameters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m122.9/244.2 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [None]:
import os
import torch
from datasets import load_dataset
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
# from peft import LoraConfig, PeftModel
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from trl import SFTTrainer

In [None]:
# Standard library import for interacting with the operating system.
import os

# Import PyTorch for deep learning models and tensor computations.
import torch

# Load and manipulate datasets conveniently with Hugging Face's dataset library.
from datasets import load_dataset

# AutoModelForCausalLM for automatic causal language model loading based on a given model name.
# AutoTokenizer for automatic tokenizer loading matching the model's expected input format.
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    # BitsAndBytesConfig to configure the bitsandbytes library for efficient training.
    BitsAndBytesConfig,
    # HfArgumentParser for parsing command-line arguments to Hugging Face model training scripts.
    HfArgumentParser,
    # TrainingArguments to specify and store hyperparameters for model training.
    TrainingArguments,
    # Pipeline for easy inference with models.
    pipeline,
    # Logging for configuring the logging level of the transformers library.
    logging,
)

# LoraConfig for configuring LoRA (Low-Rank Adaptation) parameters within PEFT (Prompt Engineering and Fine-Tuning).
from peft import LoraConfig, PeftModel

# SFTTrainer for Sparse Fine-Tuning of models, enabling efficient training on small datasets.
from trl import SFTTrainer


In [None]:
# here we define the model that we use to finetuning job which is LLaMa Chat
model_name = "NousResearch/Llama-2-7b-chat-hf"

# define the Dataset the will be used With LLaMa2
dataset_name = "MahmoudShaddad/llama2QA"

# the name of model that PEEF fine Tuning job   which will be produce
new_model = "DIN-Llama2-7bfinetune"


In [None]:

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
# Parameters for LoRA  to customize the model adaptation
# Rank of the LoRA adaptation matrices, affecting the model's adaptation capacity
lora_r = 64

# Scaling factor for LoRA, influences how adaptation matrices are applied
lora_alpha = 16

# Dropout rate in LoRA layers, helps prevent overfitting
lora_dropout = 0.1

In [None]:
# Enable using 4-bit precision for loading the base model, reducing memory usage
use_4bit = True

# Data type for computations with the 4-bit model, optimizing performance
bnb_4bit_compute_dtype = "float16"

# Specify quantization type for 4-bit precision (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Toggle nested quantization for enhanced model compression (not enabled by default)
use_nested_quant = False
# Directory to store model checkpoints and predictions
output_dir = "./results"

# Total number of training epochs to perform
num_train_epochs = 1

# # Use mixed precision training (fp16 or bf16) for efficiency, depending on hardware (set bf16 to True with an A100)
fp16 = False
bf16 = False


In [None]:
# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 4

# Gradient accumulation for larger effective batch size without increasing memory usage
gradient_accumulation_steps = 1

# # Enable saving memory by using gradient checkpointing

gradient_checkpointing = True

# Limit for gradient NORM for clipping to prevent exploding gradients
max_grad_norm = 0.3

# Initial learning rate for the optimizer (AdamW)
learning_rate = 2e-4

# Apply weight decay for regularization, excluding bias and LayerNorm weights
weight_decay = 0.001

# Choose the optimizer, with support for large models and efficient memory usage
optim = "paged_adamw_32bit"

# Learning rate scheduler to adjust learning rate over training
lr_scheduler_type = "cosine"

# If set, overrides num_train_epochs with a fixed number of training steps
max_steps = -1

# Warmup ratio for learning rate, smoothly increases learning rate at the start
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

In [None]:

# SFT parameters
# Parameters specific to Sparse Fine-Tuning (SFT)
# Configurable maximum sequence length, None to use model default

max_seq_length = None

# # Enable packing to fit more examples into each input sequence for efficiency
packing = False

# # Device mapping for model loading, can specify GPU allocation
device_map = {"": 0}

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store

In [None]:
from datasets import load_dataset, DatasetDict



# Load the 'train' split of the dataset
dataset = load_dataset(dataset_name, split='train')

# Split the dataset into training and test sets
split_data = dataset.train_test_split(test_size=0.1)  # the test size usually is 0.2 but for the seek of the fast result we set to 0.1


# Create a DatasetDict to organize the splits
split_dataset = DatasetDict({
    'train': split_data['train'],
    'test': split_data['test']
})

# Access the train and test datasets
train_ds = split_dataset['train']
test_ds = split_dataset['test']

# Now, train_ds and test_ds can be used for training and evaluation.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data:   0%|          | 0.00/544k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
# Load the QA Dataset that have been created and store in a huggingface
# dataset = load_dataset(dataset_name, split="train")

dataset = train_ds

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)


config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]



Map:   0%|          | 0/1129 [00:00<?, ? examples/s]

In [None]:

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
model = get_peft_model(model, peft_config)
print_trainable_parameters(model)

trainable params: 33554432 || all params: 3533967360 || trainable%: 0.9494833591219133


In [None]:

# Train model
trainer.train()