# Install Necessary Dependencies

In [1]:
!pip install git+https://github.com/huggingface/transformers.git
!pip install -q transformers accelerate peft datasets
!pip install -U bitsandbytes trl

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-wp8sal63
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-wp8sal63

  Resolved https://github.com/huggingface/transformers.git to commit ba29a439adbe6f371710d0514659127264ae24b3
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [2]:
!pip install ipywidgets



## Import 

In [None]:
import os
import torch
import shutil
import warnings
from trl import SFTTrainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
from huggingface_hub import HfApi, HfFolder, Repository, notebook_login
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

## Login to Hugging Face using Access Token

In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Load the Model from Hugging Face(models)

### Perform Quantization for efficient resource utilisation

In [5]:
# Load tokenizer & model that supports text generation (causal language model)
model_name = "meta-llama/Llama-3.1-8B-Instruct"
# Define BitsAndBytesConfig for 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,  # Use float16 for efficient computation
    bnb_4bit_use_double_quant=True,  # Enable double quantization
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Load model with quantization config
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,  # Replace `load_in_4bit`
    trust_remote_code=True
)

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:  71%|#######1  | 3.53G/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [None]:
shutil.rmtree("/root/.cache/huggingface/datasets", ignore_errors=True)  # Deletes Hugging Face dataset cache

# Download the Dataset from Hugging Face

I am using "Finance-Alpaca" dataset directly from Hugging Face. This dataset is trained upon finance knowledge

In [7]:
dataset = load_dataset(r"gbharti/finance-alpaca", split="train")
print(dataset)

README.md:   0%|          | 0.00/709 [00:00<?, ?B/s]

Cleaned_date.json:   0%|          | 0.00/42.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/68912 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'instruction', 'input', 'output'],
    num_rows: 68912
})


### Perform Sampling on the Dataset

In [8]:
def format_prompt(sample):
    return f"### Instruction:\n{sample['instruction']}\n\n### Response:\n{sample['output']}"

dataset = dataset.map(lambda x: {"text": format_prompt(x)}, remove_columns=dataset.column_names)

Map:   0%|          | 0/68912 [00:00<?, ? examples/s]

# LoRA Configuration 

In [9]:
# LoRA Configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,  # ✅ Higher rank improves adaptability
    lora_alpha=32,  # Adjust scaling factor
    lora_dropout=0.1,
    bias="none",
)

## Apply LoRA to the Model

In [None]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Displays trainable params

trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848


In [11]:
# Set padding and EOS token configurations for tokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Setup the Trainer for Fine Tuning 

### We would be using SFTTrainer for fine tuning which performs Supervised Learning 

In [None]:
import transformers
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,  # Use entire dataset as train
    eval_dataset=None,  # No test split
    peft_config=lora_config,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,  # Batch size = 1 for T4x2 GPU
        gradient_accumulation_steps=2,  # Increase accumulation steps for effective batch size
        warmup_ratio=0.03,  # 3% warmup for stable learning
        max_steps=300,  # Number of steps for fine-tuning
        learning_rate=1e-4,  # Stable learning rate
        logging_steps=10,  # Log every 10 steps
        output_dir="outputs",  # Save outputs here
        optim="adamw_bnb_8bit",  # Optimizer for 8-bit precision
        save_strategy="epoch",  # Save the model at each epoch
        fp16=True,  # Mixed precision for faster training
        report_to="none",  # Set to 'wandb' if you want logging
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),  # For causal LM
)

Map:   0%|          | 0/68912 [00:00<?, ? examples/s]

In [None]:
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.device_count())  # Should return 2 (if you have 2 cores), otherwise 1

True
1


In [14]:
os.environ["WANDB_DISABLED"] = "true"
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
rm -rf outputs/checkpoint-10 #to remove previous fine tuning output, if you wish to train again for better results  

In [None]:
torch.cuda.empty_cache() #To remove unncessary cache

# Start the Fine-Tuning😇⭐

In [None]:
model.config.use_cache = False  # Disable cache for training
trainer.train()  

Step,Training Loss
10,2.435
20,2.1686
30,2.1549
40,2.2622
50,1.983
60,2.1193
70,1.8312
80,2.005
90,1.9846
100,1.8678


TrainOutput(global_step=300, training_loss=1.9850379562377929, metrics={'train_runtime': 203.1768, 'train_samples_per_second': 2.953, 'train_steps_per_second': 1.477, 'total_flos': 3230919740768256.0, 'train_loss': 1.9850379562377929, 'epoch': 0.008706756442999768})

In [17]:
torch.cuda.empty_cache()

# Let's Push to Adapters to 🤗Hub

In [18]:
new_model = "Meta-Llama-3.1-8B-Finance-FineTune-Sagar" #Name of the model you will be pushing to huggingface model hub

In [None]:
trainer.model.save_pretrained(new_model) #Save the new model

In [20]:
model_id = "meta-llama/Llama-3.1-8B-Instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


### Merging Base Model with Adapter Model 

In [21]:
from peft import PeftModel
peft_model = PeftModel.from_pretrained(base_model, new_model)
# Merge the PEFT adapter with the base model
merged_model = peft_model.merge_and_unload()

Some parameters are on the meta device because they were offloaded to the cpu.


### Save the Merged Model

In [None]:
merged_model.save_pretrained("merged_model",safe_serialization=True)
tokenizer.save_pretrained("merged_model")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Saving checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

# Push the New Model to 🤗Hub

In [23]:
# Push the model and tokenizer to the Hugging Face Model Hub
merged_model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

Saving checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/sagarvk24/Meta-Llama-3.1-8B-Finance-FineTune-Sagar/commit/df34b0159321f55f44dccc165d56606f4dafcdf5', commit_message='Upload tokenizer', commit_description='', oid='df34b0159321f55f44dccc165d56606f4dafcdf5', pr_url=None, repo_url=RepoUrl('https://huggingface.co/sagarvk24/Meta-Llama-3.1-8B-Finance-FineTune-Sagar', endpoint='https://huggingface.co', repo_type='model', repo_id='sagarvk24/Meta-Llama-3.1-8B-Finance-FineTune-Sagar'), pr_revision=None, pr_num=None)