Setup PyTorch to use best hardware option

In [1]:
import torch

torch.backends.cuda.matmul.allow_tf32 = True

if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

mps


In [2]:
ARTIFACTS_BASE = '../../../artifacts'

In [3]:
from os import path
from datasets import load_from_disk

dataset_path = path.join(ARTIFACTS_BASE, 'datasets', 'jayavibhav/prompt-injection')

test_dataset = load_from_disk(path.join(dataset_path, 'test'))

test_dataset = test_dataset.rename_column('text', 'prompt')

In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=False, use_fast=True, max_length=512)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to(device)
model.eval()

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DebertaV2ForSequenceClassification(
  (deberta): DebertaV2Model(
    (embeddings): DebertaV2Embeddings(
      (word_embeddings): Embedding(128100, 768, padding_idx=0)
      (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
      (dropout): StableDropout()
    )
    (encoder): DebertaV2Encoder(
      (layer): ModuleList(
        (0-11): 12 x DebertaV2Layer(
          (attention): DebertaV2Attention(
            (self): DisentangledSelfAttention(
              (query_proj): Linear(in_features=768, out_features=768, bias=True)
              (key_proj): Linear(in_features=768, out_features=768, bias=True)
              (value_proj): Linear(in_features=768, out_features=768, bias=True)
              (pos_dropout): StableDropout()
              (dropout): StableDropout()
            )
            (output): DebertaV2SelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine

In [5]:
def tokenize_batch(batch):
    return tokenizer(batch["prompt"], padding='max_length', truncation=True, max_length=512)

In [6]:
prompts_test_tokenized = test_dataset.map(tokenize_batch, batched=True)

Map:   0%|          | 0/65416 [00:00<?, ? examples/s]

In [7]:
from tqdm import tqdm

batch_size = 8
predictions = []
total_batches = len(prompts_test_tokenized) // batch_size + (1 if len(prompts_test_tokenized) % batch_size != 0 else 0)

with torch.no_grad():
    progress_bar = tqdm(total=total_batches, desc="Running inference", unit="batch")
    for batch in prompts_test_tokenized.select_columns(["input_ids", "attention_mask"]).with_format("torch").iter(batch_size=batch_size):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        predictions.extend(outputs.logits.argmax(dim=-1).cpu().tolist())
        progress_bar.update(1)
    
    progress_bar.close()

Running inference: 100%|██████████| 8177/8177 [1:56:20<00:00,  1.17batch/s]


In [8]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

true_labels = prompts_test_tokenized["label"]
accuracy = accuracy_score(true_labels, predictions)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='binary')

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [9]:
import pandas as pd

metrics_df = pd.DataFrame({
    'Accuracy': [accuracy],
    'Precision': [precision],
    'Recall': [recall],
    'F1 Score': [f1]
})

# Format all columns to display 4 decimal places
for column in metrics_df.columns:
    metrics_df[column] = metrics_df[column].apply(lambda x: f"{x:.4f}")

metrics_df

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
0,0.4937,0.4937,1.0,0.661
