Setup PyTorch to use best hardware option

In [1]:
import torch

if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

cuda


Get model and tokenizer from Hugginface

In [2]:
from transformers import DebertaV2Tokenizer, DebertaV2Model

model_name = "microsoft/deberta-v3-base"
tokenizer = DebertaV2Tokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=False, use_fast=True)
model = DebertaV2Model.from_pretrained(model_name).to(device)
model.eval()

DebertaV2Model(
  (embeddings): DebertaV2Embeddings(
    (word_embeddings): Embedding(128100, 768, padding_idx=0)
    (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
    (dropout): StableDropout()
  )
  (encoder): DebertaV2Encoder(
    (layer): ModuleList(
      (0-11): 12 x DebertaV2Layer(
        (attention): DebertaV2Attention(
          (self): DisentangledSelfAttention(
            (query_proj): Linear(in_features=768, out_features=768, bias=True)
            (key_proj): Linear(in_features=768, out_features=768, bias=True)
            (value_proj): Linear(in_features=768, out_features=768, bias=True)
            (pos_dropout): StableDropout()
            (dropout): StableDropout()
          )
          (output): DebertaV2SelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
            (dropout): StableDropout()
          )
        )
        (intermedia

Dataset load

In [3]:
ARTIFACTS_BASE = '../../../artifacts'

In [4]:
from os import path
from datasets import load_from_disk

dataset_path = path.join(ARTIFACTS_BASE, 'step-1-classic-ml', 'deberta-v3-base')

train_dataset = load_from_disk(path.join(dataset_path, 'train'))
test_dataset = load_from_disk(path.join(dataset_path, 'test'))

Prepare loaded dataset for metrics counting

In [7]:
import numpy as np

X_test_embedding_list = np.array(test_dataset["embedding"])
y_test_label_list = np.array(test_dataset["label"])

X_train_embedding_list = np.array(train_dataset["embedding"])
y_train_label_list = np.array(train_dataset["label"])

In [8]:
# Check number of training and testing samples
print(f"#Training Samples: {len(X_train_embedding_list)}")
print(f"#Testing Samples: {len(X_test_embedding_list)}")

#Training Samples: 261738
#Testing Samples: 65416


In [9]:
# Import classification models
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
from sklearn.neural_network import MLPClassifier

# Initialize estimators using their default parameters
estimators = [
    ("Naive Bayes", GaussianNB()),
    ("LGB Classifier", lgb.LGBMClassifier()),
    ("Random Forest", RandomForestClassifier(n_estimators=100, max_depth=10, n_jobs=-1)),
    ("MLPClassifier", MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)),
]

In [10]:
# Import performance metrics libraries
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

# Prepare a DataFrame to keep track of the models' performance
results = pd.DataFrame(columns=["accuracy", "precision", "recall", "f1 score"])

# Iterate through each estimator in the list
for est_name, est_obj in tqdm(desc='Calculate metrics', iterable=estimators):

    if hasattr(est_obj, 'to'):
        est_obj.to(device)  # Move model to GPU if supported
    
    # Fit the model
    est_obj.fit(X_train_embedding_list, y_train_label_list)
    
    # Use the model to predict unseen prompts
    y_predict = est_obj.predict(X_test_embedding_list)
    
    # Calculate performance metrics
    accuracy = accuracy_score(y_test_label_list, y_predict)
    precision = precision_score(y_test_label_list, y_predict)
    recall = recall_score(y_test_label_list, y_predict)
    f1 = f1_score(y_test_label_list, y_predict)  
    
    # Store performance metrics
    results.loc[est_name] = [accuracy, precision, recall, f1]

Calculate metrics:  25%|██▌       | 1/4 [00:04<00:12,  4.04s/it]

[LightGBM] [Info] Number of positive: 129232, number of negative: 132506
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.725354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 195840
[LightGBM] [Info] Number of data points in the train set: 261738, number of used features: 768
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.493746 -> initscore=-0.025019
[LightGBM] [Info] Start training from score -0.025019


Calculate metrics: 100%|██████████| 4/4 [2:12:23<00:00, 1985.79s/it]


In [11]:
results

Unnamed: 0,accuracy,precision,recall,f1 score
Naive Bayes,0.600373,0.575193,0.728618,0.642879
LGB Classifier,0.904152,0.871793,0.944789,0.906824
Random Forest,0.830699,0.780045,0.915093,0.842189
MLPClassifier,0.967944,0.985998,0.948535,0.966904
