In [None]:
! rm -rf /opt/conda/lib/python3.10/site-packages/aiohttp-3.9.1.dist-info

!pip install peft

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
!pip install bitsandbytes

In [None]:
!pip install accelerate

In [5]:
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig

In [6]:
model_name = "meta-llama/Meta-Llama-3-8B"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [7]:
from transformers import AutoModelForSequenceClassification

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    device_map="auto",
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
)

In [9]:
from peft import prepare_model_for_kbit_training
model = prepare_model_for_kbit_training(model)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [12]:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
model.config.pad_token_id = tokenizer.pad_token_id

In [None]:
from datasets import load_dataset

dataset = load_dataset("nyu-mll/multi_nli")
dataset = dataset['train'].shuffle().select(range(11000))


In [None]:
from datasets import load_dataset

train_data = load_dataset("glue", name="mnli", split="train")

train_data = train_data.select(range(len(train_data) // 50))  # Select first 2% of the data

dev_data = load_dataset("glue", name="mnli", split="validation_matched")  # Use matched validation set
dev_data = dev_data.select(range(len(dev_data) // 20))  # Select first 5% of the data


In [None]:
def preprocess_function(examples):
    return tokenizer(examples['premise'], examples['hypothesis'], truncation=True, padding='max_length', max_length=128)

train_data = train_data.map(preprocess_function, batched=True)
dev_data = dev_data.map(preprocess_function, batched=True)

In [26]:
from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(
        lora_alpha=32,
        lora_dropout=0.05,
        r=64,
        bias="none",
        target_modules = ["q_proj",
        "k_proj",
        "v_proj",
        "o_proj",],
    task_type="SEQ_CLS",
)

model = get_peft_model(model, peft_config)

In [18]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable Parameters: {trainable_params}")

Trainable Parameters: 54538240


In [19]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}


In [None]:
from transformers import Trainer, TrainingArguments

In [None]:
args = TrainingArguments(
    output_dir="./results",                
    per_device_train_batch_size=4,          
    gradient_accumulation_steps=2,          
    optim="adamw_torch_fused",              
    save_strategy="epoch",                  
    learning_rate=1e-4,                     
    max_steps=500,
    logging_steps=100,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [29]:
# import time
model.config.pad_token_id = model.config.eos_token_id

trainer.train()


Step,Training Loss
100,31.9844
200,25.6844
300,5.6276
400,2.5234
500,1.6068




TrainOutput(global_step=500, training_loss=13.485304779052735, metrics={'train_runtime': 2491.8409, 'train_samples_per_second': 1.605, 'train_steps_per_second': 0.201, 'total_flos': 2.1608873852928e+16, 'train_loss': 13.485304779052735, 'epoch': 0.5091649694501018})

In [30]:
results = trainer.evaluate()

print("Evaluation Results:")
for key, value in results.items():
    print(f"  {key}: {value}")

Evaluation Results:
  eval_loss: 1.341796875
  eval_accuracy: 0.6285714285714286
  eval_runtime: 143.1378
  eval_samples_per_second: 3.423
  eval_steps_per_second: 0.433
  epoch: 0.5091649694501018
