In [1]:
# pip installs

!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q --upgrade requests==2.32.3 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 datasets==3.2.0 peft==0.14.0 trl==0.14.0 matplotlib wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m908.3/908.3 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.3/7.3 MB[0m [31m73.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m65.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [1]:
import os
import re
import math
from tqdm import tqdm
from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from datasets import load_dataset, Dataset, DatasetDict
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datetime import datetime
import matplotlib.pyplot as plt

In [2]:
BASE_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
PROJECT_NAME = "QA-EnergyAI"
HF_USER = "mahmuuud"
MAX_LENGTH = 1024

DATASET_NAME = f"{HF_USER}/electric-vehicle-charging-stations"

RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"



LORA_R = 16
LORA_ALPHA = 32
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
LORA_DROPOUT = 0.1
IS_QUANT_4_BIT = True


EPOCHS = 1
BATCH_SIZE = 8
GRADIENT_ACCUMULATION_STEPS = 1
LEARNING_RATE = 1e-4
LR_SCHEDULER_TYPE = 'cosine'
WARMUP_RATIO = 0.03
OPTIMIZER = "paged_adamw_32bit"

CUTOFF_LEN = 512

STEPS = 20
SAVE_STEPS = 100
LOG_TO_WANDB = True

In [3]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [4]:
wandb_api_key = userdata.get('WANDB_API_KEY')
os.environ["WANDB_API_KEY"] = wandb_api_key
wandb.login()


os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "end"
os.environ["WANDB_WATCH"] = "gradients"

[34m[1mwandb[0m: Currently logged in as: [33mmahmuuudtolba[0m ([33mmahmuuudtolba-mansoura-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
if LOG_TO_WANDB:
  wandb.init(project=PROJECT_NAME, name=RUN_NAME)

In [6]:
if IS_QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
  )
else:
  quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16
  )

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto"
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL , trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

In [9]:
raw_dataset = load_dataset(DATASET_NAME)

In [10]:
raw_dataset

DatasetDict({
    train: Dataset({
        features: ['alpaca_format', 'text_format'],
        num_rows: 318
    })
})

In [11]:
def format_to_chat(example):
    instruction = example["alpaca_format"]["instruction"]
    output_text = example["alpaca_format"]["output"]

    # LLaMA Instruct expects role-based chat messages
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": instruction},
        {"role": "assistant", "content": output_text}
    ]

    # Apply chat template to produce a single text string
    formatted_text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )

    return {"text": formatted_text}

processed_dataset = raw_dataset.map(format_to_chat)

Map:   0%|          | 0/318 [00:00<?, ? examples/s]

In [12]:
def tokenize(example):
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH
    )

dataset = processed_dataset.map(
    tokenize,
    batched=True,
    remove_columns=processed_dataset["train"].column_names
)

Map:   0%|          | 0/318 [00:00<?, ? examples/s]

In [13]:
dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 318
    })
})

In [14]:
lora_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

train_parameters = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    eval_steps=SAVE_STEPS,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIMIZER,
    save_steps=SAVE_STEPS,
    save_total_limit=3,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    warmup_ratio=WARMUP_RATIO,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb" if LOG_TO_WANDB else None,
    run_name=RUN_NAME,
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=True,
    dataloader_pin_memory=False,
    remove_unused_columns=False,
    max_seq_length=MAX_LENGTH,
    packing=False,
)

fine_tuning = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    peft_config=lora_parameters,
    args=train_parameters,

  )

In [15]:
# Fine-tune!
fine_tuning.train()

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")



Step,Training Loss
20,2.9573
40,1.6545
60,1.5471
80,1.4921
100,1.4693
120,1.4127
140,1.3811
160,1.4918
180,1.511
200,1.3901


[34m[1mwandb[0m: Adding directory to artifact (./QA-EnergyAI-2025-07-29_09.16.39/checkpoint-100)... Done. 0.3s
[34m[1mwandb[0m: Adding directory to artifact (./QA-EnergyAI-2025-07-29_09.16.39/checkpoint-200)... Done. 0.8s
[34m[1mwandb[0m: Adding directory to artifact (./QA-EnergyAI-2025-07-29_09.16.39/checkpoint-300)... Done. 0.4s
[34m[1mwandb[0m: Adding directory to artifact (./QA-EnergyAI-2025-07-29_09.16.39/checkpoint-318)... Done. 0.4s


README.md:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

Uploading...:   0%|          | 0.00/36.7M [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Saved to the hub: QA-EnergyAI-2025-07-29_09.16.39


In [16]:
if LOG_TO_WANDB:
  wandb.finish()

0,1
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
train/grad_norm,▄▄▇▁▃▃▃▁█▃▃▂▅▃▄
train/learning_rate,███▇▇▆▅▅▄▃▃▂▂▁▁
train/loss,█▂▂▂▁▁▁▂▂▁▁▁▁▁▁

0,1
total_flos,5525172224262144.0
train/epoch,1.0
train/global_step,318.0
train/grad_norm,2.24956
train/learning_rate,0.0
train/loss,1.458
train_loss,1.55266
train_runtime,2306.4973
train_samples_per_second,0.138
train_steps_per_second,0.138
