In [1]:
!pip install peft

Collecting peft
  Downloading peft-0.14.0-py3-none-any.whl.metadata (13 kB)
Collecting huggingface-hub>=0.25.0 (from peft)
  Downloading huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.14.0-py3-none-any.whl (374 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.8/374.8 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading huggingface_hub-0.27.0-py3-none-any.whl (450 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m450.5/450.5 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface-hub, peft
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.24.7
    Uninstalling huggingface-hub-0.24.7:
      Successfully uninstalled huggingface-hub-0.24.7
Successfully installed huggingface-hub-0.27.0 peft-0.14.0


In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import torch
from peft import PeftModel, PeftConfig

In [3]:
dataset = load_dataset("glue", "sst2")
test_data = dataset["validation"].shuffle(seed=40)

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [4]:
# # for lora
b = "bert-base-uncased"
r = "roberta-base"
adapter_model_name = "dog-in-the-box-studio/sst2-roberta-lora"  # Adapter model repo

model_name = r  # Base model used for LoRA
# adapter_model_name = "dog-in-the-box-studio/qpp-roberta-lora"  # Adapter model repo

# Load tokenizer and base model
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Load the adapter configuration
peft_config = PeftConfig.from_pretrained(adapter_model_name)

# Apply the adapter to the base model
model = PeftModel.from_pretrained(base_model, adapter_model_name)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_config.json:   0%|          | 0.00/840 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/7.09M [00:00<?, ?B/s]

In [5]:
# model_name = "dog-in-the-box-studio/sst2-roberta-full"  # Replace with your fine-tuned model directory if saved locally
# tokenizer = AutoTokenizer.from_pretrained(r)
# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [7]:
def preprocess(dataset, tokenizer, max_length=512):
    def tokenize_function(examples):
        tokenized_sentence = tokenizer(
            examples['sentence'], 
            padding="max_length", 
            truncation=True, 
            max_length=max_length
        )
        tokenized_sentence["labels"] = examples["label"]
        return tokenized_sentence

    tokenized_dataset = dataset.map(tokenize_function, batched=True)
    tokenized_dataset = tokenized_dataset.remove_columns(['sentence', 'idx'])
    return tokenized_dataset

# Apply preprocessing to the test set
tokenized_test_data = preprocess(test_data, tokenizer, max_length=128)



Map:   0%|          | 0/872 [00:00<?, ? examples/s]

In [8]:
def prepare_data(data):
    input_ids = torch.tensor(data["input_ids"]).to(device)
    attention_mask = torch.tensor(data["attention_mask"]).to(device)
    labels = torch.tensor(data["labels"]).to(device)
    return input_ids, attention_mask, labels

test_input_ids, test_attention_mask, test_labels = prepare_data(tokenized_test_data)

In [9]:
def evaluate_model(model, input_ids, attention_mask, labels):
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1).cpu().numpy()  # Move predictions back to CPU for evaluation
        true_labels = labels.cpu().numpy()  # Move true labels back to CPU for evaluation
        return predictions, true_labels
predictions, true_labels = evaluate_model(model, test_input_ids, test_attention_mask, test_labels)

In [10]:
tp = 0  # True Positives
fp = 0  # False Positives
tn = 0  # True Negatives
fn = 0  # False Negatives

# Compute confusion matrix values
for pred, label in zip(predictions, true_labels):
    if pred == 1 and label == 1:
        tp += 1
    elif pred == 1 and label == 0:
        fp += 1
    elif pred == 0 and label == 0:
        tn += 1
    elif pred == 0 and label == 1:
        fn += 1

# Accuracy
accuracy = (tp + tn) / (tp + fp + tn + fn)

# Precision
precision = tp / (tp + fp) if (tp + fp) > 0 else 0

# Recall
recall = tp / (tp + fn) if (tp + fn) > 0 else 0

# F1-Score
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# Step 8: Print Results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Accuracy: 0.9255
Precision: 0.9183
Recall: 0.9369
F1-Score: 0.9275
