In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#libraries
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
import re
import logging
import time
import polars as pl

#setup
logging.basicConfig(level=logging.INFO)

#group library
from label_legends.preprocess import holdout, transform, load_data

  from .autonotebook import tqdm as notebook_tqdm
  return "\Tree [.%s ? ]" % self.label


In [3]:
def evaluate_baseline(true_labels, predictions, baseline_name):
    
    logging.info(f"Evaluating {baseline_name}...")
    
    #precision, recall, F1(macro avg)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average="macro")
    
    #confusion matrix
    cm = confusion_matrix(true_labels, predictions)
    
    # Print Evaluation Metrics
    print(f"\n{baseline_name} Evaluation Metrics:")
    print(f"Precision (Macro Avg): {precision:.4f}")
    print(f"Recall (Macro Avg): {recall:.4f}")
    print(f"F1 Score (Macro Avg): {f1:.4f}")
    print("\nConfusion Matrix:")
    print(cm)

In [4]:
#loading the data
logging.info("Loading data...")
load_data().collect()

#transforming the data
val, tra = holdout()
logging.info("Transforming data...")
tra = transform(tra)
val = transform(val)

#converting 'label' column to integer
logging.info("Converting labels to integers...")
tra = tra.with_columns(
    pl.col("label").cast(pl.Int32)
)
val = val.with_columns(
    pl.col("label").cast(pl.Int32)
)

INFO:root:Loading data...
INFO:root:Transforming data...
INFO:root:Converting labels to integers...


In [5]:
tra

id,text,tokens,token_ids,label
i64,str,list[str],list[i64],i32
26016,"""#VoteDemOut seditionist wanna …","[""#"", ""votedemout"", … ""ven""]","[7, 3000, … 3000]",0
41060,"""Cerno hops from wave to wave: …","[""cerno"", ""hop"", … "".""]","[3000, 3000, … 26]",1
35766,"""think about it.. he´s called J…","[""think"", "".."", … ""elect""]","[2688, 27, … 911]",0
23678,"""Hmm, you could rewrite this wi…","[""hmm"", "","", … ""justsaying""]","[1284, 22, … 3000]",0
52396,"""Every girl in a game, or on si…","[""girl"", ""game"", … "".""]","[1164, 1135, … 26]",0
…,…,…,…,…
6785,"""I just think my dick couldn't …","[""just"", ""think"", … ""..😂😂😂""]","[1482, 2688, … 3000]",0
31355,"""no he deadasss hit the nail on…","[""deadass"", ""hit"", … "".""]","[3000, 1282, … 26]",0
49834,"""Aren't these goblins always wh…","[""goblins"", ""whine"", … ""hastagsarestillcool""]","[3000, 2914, … 3000]",0
26158,"""1. Stop calling girls, women a…","[""1."", ""stop"", … "".""]","[39, 2567, … 26]",0


In [6]:
#putting the text and label columns into lists
train_texts = tra['text'].to_list()
train_labels = tra['label'].to_list()

val_texts = val['text'].to_list()
val_labels = val['label'].to_list()

In [7]:
#calculating the most frequent class
logging.info("Calculating most frequent class...")
mf_class = max(set(train_labels), key=train_labels.count)

INFO:root:Calculating most frequent class...


In [8]:
#predict the most frequent class

#timing the test time
start_time = time.time()

#redict and evaluate
logging.info(f"Most frequent class: {mf_class}")
val_pred_mf = [mf_class] * len(val_labels)

train_time_mf = 0  # No training time for most frequent baseline
test_time_mf = time.time() - start_time

INFO:root:Most frequent class: 0


In [9]:
#evaluation
evaluate_baseline(val_labels, val_pred_mf, "Most Frequent Baseline")
print(f"Training Time: {train_time_mf:.4f} seconds")
print(f"Test Time: {test_time_mf:.4f} seconds")

INFO:root:Evaluating Most Frequent Baseline...



Most Frequent Baseline Evaluation Metrics:
Precision (Macro Avg): 0.3721
Recall (Macro Avg): 0.5000
F1 Score (Macro Avg): 0.4266

Confusion Matrix:
[[9376    0]
 [3224    0]]
Training Time: 0.0000 seconds
Test Time: 0.0010 seconds


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
