In [1]:

# ! pip install -U bitsandbytes
# !pip install -U transformers accelerate peft datasets evaluate huggingface_hub pandas


In [4]:
import os
from huggingface_hub import login
hf_token = os.environ["HF_TOKEN"]
login(hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [5]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("CUDA device:", torch.cuda.get_device_name(0))
print("CUDA version:", torch.version.cuda)

CUDA available: True
CUDA device: NVIDIA A10G
CUDA version: 12.8


In [6]:
# !pip install -q transformers datasets peft evaluate accelerate bitsandbytes


from datasets import load_dataset
from transformers import AutoTokenizer

dataset = load_dataset("ag_news")
train_dataset = dataset["train"].shuffle(seed=42).select(range(5000))
test_dataset = dataset["test"].shuffle(seed=42).select(range(1000))
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # fix padding issue explicitly
tokenizer.padding_side = "right"

def preprocess(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(preprocess, batched=True).remove_columns("text")
test_dataset = test_dataset.map(preprocess, batched=True).remove_columns("text")


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [8]:
label_names = train_dataset.features["label"].names
label_names

['World', 'Sports', 'Business', 'Sci/Tech']

### Instantiation for PEFT and LoRA

In [6]:
import torch
from transformers import AutoModelForSequenceClassification#, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training

 # fully open, no gated access

# bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=4,
    # quantization_config=bnb_config,
    device_map="auto"
)

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "down_proj", "up_proj"],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v1.0 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 6,316,032 || all params: 1,040,836,608 || trainable%: 0.6068


### Finetuning

In [24]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
import numpy as np
import evaluate
import os
# os.environ["XLA_USE_BF16"] = "1"

accuracy_metric = evaluate.load("accuracy")
tokenizer.pad_token   = tokenizer.eos_token
tokenizer.padding_side = "right"
model.config.pad_token_id = tokenizer.pad_token_id

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=preds, references=labels)


training_args = TrainingArguments(
    output_dir="./distilbert-classifier",
    num_train_epochs=8,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=3e-5,
    logging_steps=50,
    report_to="none",
    label_names=label_names
    # fp16=True,
    # bf16=True,
    # no bf16, no fp16
)



trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
    compute_metrics=compute_metrics

)

trainer.train()


  trainer = Trainer(


Step,Training Loss
50,0.1181
100,0.2147
150,0.1298
200,0.0748
250,0.075
300,0.0231
350,0.0178
400,0.0179
450,0.0076
500,0.0091


TrainOutput(global_step=1000, training_loss=0.03485668756440282, metrics={'train_runtime': 702.8586, 'train_samples_per_second': 22.764, 'train_steps_per_second': 1.423, 'total_flos': 1.1984493871104e+16, 'train_loss': 0.03485668756440282, 'epoch': 8.0})

In [25]:
metrics = trainer.evaluate(test_dataset)
print(metrics)


{'eval_runtime': 9.7194, 'eval_samples_per_second': 51.443, 'eval_steps_per_second': 3.292, 'epoch': 8.0}


### SAve Model

In [28]:
final_dir = "./final_model"
trainer.save_model(final_dir)
tokenizer.save_pretrained(final_dir)

('./final_model/tokenizer_config.json',
 './final_model/special_tokens_map.json',
 './final_model/tokenizer.json')

### New Text Inferences

In [27]:
# Put model in evaluation mode and move to CPU (or GPU) for inference
model.eval()
model.to("cuda")

# Example texts to classify
examples = [
    "NASA launches a new research satellite to study climate change effects.",
    "The Barcelona lost the last Soccer Match Cricket."
]

for text in examples:
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128).to("cuda")
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    pred_label_id = int(logits.argmax(dim=-1))
    print(f"Text: {text}\n  Predicted label: {label_names[pred_label_id]}\n")


Text: NASA launches a new research satellite to study climate change effects.
  Predicted label: Sci/Tech

Text: The Barcelona lost the last Soccer Match Cricket.
  Predicted label: Sports



### Load the saved Model again and do a fresh Test

In [13]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel

# 1) Load your tokenizer from the folder you saved
tokenizer = AutoTokenizer.from_pretrained("./final_model")

# 2) Load the original base model with the **correct** num_labels
base_model = AutoModelForSequenceClassification.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",  # same base model you used for training
    num_labels=4,
    device_map="auto"
)

# 3) Attach your LoRA adapters from disk
model = PeftModel.from_pretrained(
    base_model,
    "./final_model",    # this folder has the adapter weights + PEFT config
    inference_mode=True # optional: disables gradients
)

# Now `model` has your trained head of size 4 and your LoRA adapters.
# Put model in evaluation mode and move to CPU (or GPU) for inference
model.eval()
model.to("cuda")

# Example texts to classify
examples = [
    "NASA launches a new research satellite to study climate change effects.",
    "The Barcelona lost the last Soccer Match Cricket."
]

for text in examples:
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128).to("cuda")
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    pred_label_id = int(logits.argmax(dim=-1))
    print(f"Text: {text}\n  Predicted label: {label_names[pred_label_id]}\n")

# 1) Merge adapters into base weights and unload the PEFT wrapper
merged = model.merge_and_unload()  
merged.save_pretrained("./merged_model")
tokenizer.save_pretrained("./merged_model")


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v1.0 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Text: NASA launches a new research satellite to study climate change effects.
  Predicted label: Sci/Tech

Text: The Barcelona lost the last Soccer Match Cricket.
  Predicted label: Sports



## Final Test (You can run this code Directly after pip installations

In [2]:
from peft import PeftModel
from transformers import AutoModelForSequenceClassification, pipeline



# 2) Now reload as a normal SequenceClassification model
clf = pipeline(
    "text-classification",
    model="./merged_model",
    tokenizer="./merged_model",
    device=0
)

examples = [
    "NASA launches a new research satellite to study climate change effects.",
    "Barcelona lost the last soccer match 2-1."
]
print(clf(examples))


Device set to use cuda:0


[{'label': 'LABEL_3', 'score': 0.9999626874923706}, {'label': 'LABEL_1', 'score': 0.9979887008666992}]
