In [None]:
import sys
from pathlib import Path
ROOT_DIR = Path().resolve().parents[0]
sys.path.append(str(ROOT_DIR))
import config as cfg

from datasets import load_from_disk
import pandas as pd
import os
from transformers import AutoModelForSequenceClassification, Trainer
import numpy as np
import pickle
from sklearn.metrics import roc_auc_score

N_RUN = 0

# Load data

In [2]:
ds_test_tokenized = load_from_disk(cfg.PATH_DS_TEST_TOKENIZED)

In [3]:
df_test_labels = pd.read_csv(cfg.PATH_DF_TEST_LABELS)
df_test_labels

Unnamed: 0,id,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,00001cee341fdb12,-1,-1,-1,-1,-1,-1
1,0000247867823ef7,-1,-1,-1,-1,-1,-1
2,00013b17ad220c46,-1,-1,-1,-1,-1,-1
3,00017563c3f7919a,-1,-1,-1,-1,-1,-1
4,00017695ad8997eb,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...
153159,fffcd0960ee309b5,-1,-1,-1,-1,-1,-1
153160,fffd7a9a6eb32c16,-1,-1,-1,-1,-1,-1
153161,fffda9e8d6fafa9e,-1,-1,-1,-1,-1,-1
153162,fffe8f1340a79fc2,-1,-1,-1,-1,-1,-1


# Predictions

In [4]:
def get_predictions(n_run, ds_test_tokenized):
	preds_folds = {f"run_{n_run}": {}}
	preds_folds[f"run_{n_run}"] = {f"fold_{fold_id}": {"model": None, "trainer": None, "predictions": None} for fold_id in range(cfg.N_FOLDS)}

	for fold_id in range(cfg.N_FOLDS):
		print(f"Processing fold {fold_id}...")
		# path_model_trained = os.path.join("checkpoints", MODEL_BASE, f"run_{N_RUN}", f"fold_{fold_id}", "model_final")
		path_model_trained = os.path.join(cfg.PATH_CHECKPOINTS, cfg.MODEL_BASE, f"run_{n_run}", f"fold_{fold_id}", "model_final")
		model = AutoModelForSequenceClassification.from_pretrained(path_model_trained).to("cuda")
		model.eval()
		trainer = Trainer(model=model, compute_metrics=None)
		preds = trainer.predict(ds_test_tokenized).predictions
		preds_folds[f"run_{N_RUN}"][f"fold_{fold_id}"]["model"] = model
		preds_folds[f"run_{N_RUN}"][f"fold_{fold_id}"]["trainer"] = trainer
		preds_folds[f"run_{N_RUN}"][f"fold_{fold_id}"]["predictions"] = preds

	predictions_list = [preds_folds[f"run_{N_RUN}"][f"fold_{fold_id}"]["predictions"] for fold_id in range(cfg.N_FOLDS)]
	logits_mean = np.mean(predictions_list, axis=0)
	probs = 1 / (1 + np.exp(-logits_mean))

	return preds_folds, probs

In [None]:
# Get predictions
preds_folds, probs = get_predictions(N_RUN, ds_test_tokenized)

In [None]:
# Save preds_folds dictionary to a file
with open(cfg.PATH_PREDICTIONS_DICT, "wb") as f:
	pickle.dump(preds_folds, f)

In [None]:
# Create a DataFrame with the predictions
df_submission = pd.DataFrame(probs, columns=cfg.LABEL_COLS)
df_submission.insert(0, "id", df_test_labels["id"])
df_submission.to_csv(cfg.PATH_DF_SUBMISSION, index=False)
df_submission

In [None]:
# Create a list of true label columns
label_cols_true = [f"{col}_true" for col in cfg.LABEL_COLS]

In [None]:
# Create a DataFrame to compare true labels and predictions
df_check_scores = df_submission.merge(df_test_labels, on="id", suffixes=("_pred", "_true"))
df_check_scores = df_check_scores.loc[(df_check_scores[label_cols_true]!=-1).all(axis=1)]
df_check_scores

In [None]:
# Calculate AUC scores for each label
auc_scores = {}
for col in cfg.LABEL_COLS:
    y_true = df_check_scores[f"{col}_true"]
    y_pred = df_check_scores[f"{col}_pred"]
    auc_scores[col] = roc_auc_score(y_true, y_pred)

In [None]:
mean_auc = np.mean(list(auc_scores.values()))
print(f"AUC scores per label: {auc_scores}")
print(f"Mean column-wise ROC AUC: {mean_auc:.6f}")