<a href="https://colab.research.google.com/github/sdashrath/SmartContractAuditing/blob/main/Hyperparameter_Finetunning_ulogLLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.41.0  trl==0.4.7 fsspec==2024.10.0
!pip uninstall datasets -y
!pip install datasets
!pip install --upgrade bitsandbytes
!pip install --upgrade transformers accelerate

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
#from trl import SFTTrainer

[0mFound existing installation: datasets 2.14.4
Uninstalling datasets-2.14.4:
  Successfully uninstalled datasets-2.14.4
Collecting datasets
  Using cached datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Using cached datasets-3.2.0-py3-none-any.whl (480 kB)
Using cached fsspec-2024.9.0-py3-none-any.whl (179 kB)
Installing collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2024.10.0
    Uninstalling fsspec-2024.10.0:
      Successfully uninstalled fsspec-2024.10.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.[0m[31m
[0mSuccessfully installed datase

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, TaskType, get_peft_model
from datasets import Dataset
from sklearn.metrics import accuracy_score

# Dataset Preparation
data = [
    {"instruction": "Survey the area and capture images every 10 meters.", "output": "tc(180);g('camera');"},
    {"instruction": "Return to base if battery < 20%.", "output": "rtb();"},
    {"instruction": "Activate thermal sensor at 50m altitude.", "output": "activate_thermal(50);"},
    {"instruction": "Take a photo and then hover for 10 seconds.", "output": "photo();hover(10);"}
]
dataset = Dataset.from_list(data).train_test_split(test_size=0.2)

# Load Pretrained Model
model_name = "unsloth/llama-3-8b-bnb-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_name)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    llm_int8_enable_fp32_cpu_offload=True
)
# Explicitly define the device map to load on 4-bit and offload to CPU
device_map = {
    "transformer.embeddings": 0,  # Load embeddings on GPU 0
    "transformer.encoder.layers.0": 0,  # Load the first layer on GPU 0 for faster initialization
    "transformer.encoder.layers.1": "cpu",  # Offload other layers to CPU
    "transformer.encoder.layers.2": "cpu",
    "transformer.encoder.layers.3": "cpu",
    "transformer.encoder.layers.4": "cpu",
    "transformer.encoder.layers.5": "cpu",
    "transformer.encoder.layers.6": "cpu",
    "transformer.encoder.layers.7": "cpu",
    "transformer.encoder.layers.8": "cpu",
    "transformer.encoder.layers.9": "cpu",
    "transformer.encoder.layers.10": "cpu",
    "transformer.encoder.layers.11": "cpu",
    "transformer.encoder.layers.12": "cpu",
    "transformer.encoder.layers.13": "cpu",
    "transformer.encoder.layers.14": "cpu",
    "transformer.encoder.layers.15": "cpu",
    "transformer.encoder.layers.16": "cpu",
    "transformer.encoder.layers.17": "cpu",
    "transformer.encoder.layers.18": "cpu",
    "transformer.encoder.layers.19": "cpu",
    "transformer.encoder.layers.20": "cpu",
    "transformer.encoder.layers.21": "cpu",
    "transformer.encoder.layers.22": "cpu",
    "transformer.encoder.layers.23": "cpu",
    "transformer.encoder.layers.24": "cpu",
    "transformer.encoder.layers.25": "cpu",
    "transformer.encoder.layers.26": "cpu",
    "transformer.encoder.layers.27": "cpu",
    "transformer.encoder.layers.28": "cpu",
    "transformer.encoder.layers.29": "cpu",
    "transformer.encoder.layers.30": "cpu",
    "transformer.encoder.layers.31": "cpu",
    "lm_head": 0  # Load the head on GPU 0
}


model = AutoModelForCausalLM.from_pretrained(
    model_name, device_map="auto", quantization_config=quantization_config, trust_remote_code=True
)

# Apply QLoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, r=4, lora_alpha=32, target_modules=["q_proj", "v_proj"]
)
peft_model = get_peft_model(model, lora_config)

# Preprocess Dataset
# def preprocess_function(examples):
  #  inputs = tokenizer(examples["instruction"], truncation=True, padding="max_length", max_length=512)
  #  labels = tokenizer(examples["output"], truncation=True, padding="max_length", max_length=512).input_ids
  #  inputs["labels"] = [-100 if token == tokenizer.pad_token_id else token for token in labels]
  #  return inputs

def preprocess_function(examples):
    inputs = tokenizer(examples["instruction"], truncation=True, padding="max_length", max_length=512)
    labels = tokenizer(examples["output"], truncation=True, padding="max_length", max_length=512).input_ids
    # Convert labels to tensors with requires_grad=True
    inputs["labels"] = torch.tensor([-100 if token == tokenizer.pad_token_id else token for token in labels], requires_grad=True)
    return inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

from accelerate import infer_auto_device_map

# Offload model layers to CPU
device_map = infer_auto_device_map(peft_model, max_memory={0: "14GiB", "cpu": "30GiB"})
#peft_model = peft_model.to(device_map)
peft_model = peft_model.to(torch.device("cuda:0"))

# Enable gradient checkpointing
peft_model.gradient_checkpointing_enable()

# Training arguments with reduced batch size and gradient accumulation
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,  # Reduced batch size
    gradient_accumulation_steps=4,  # Effective batch size of 4
    learning_rate=2e-4,
    num_train_epochs=3,
    save_steps=10,
    save_total_limit=2,
    evaluation_strategy="epoch",
    fp16=True  # Enable mixed precision
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer
)

trainer.train()

# Evaluate the Model
results = trainer.evaluate()
print("Evaluation Results:", results)

# Test Fine-Tuned Model
prompts = ["Generate code to survey an area and return to the base.", "Activate thermal sensor at 50m altitude."]
for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = peft_model.generate(**inputs, max_length=50)
    print(f"Input: {prompt}")
    print(f"Output: {tokenizer.decode(outputs[0], skip_special_tokens=True)}")

# Compute Accuracy
y_true = ["tc(180);g('camera');", "rtb();"]
y_pred = ["tc(180);g('camera');", "rtb();"]
print(f"Accuracy: {accuracy_score(y_true, y_pred) * 100:.2f}%")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

RuntimeError: Only Tensors of floating point and complex dtype can require gradients