In [118]:
import pandas as pd
import numpy as np
import os
import torchmetrics as tm
import torch
from tqdm.auto import tqdm

In [2]:
# Dataset
DATA_DIR_PATH = os.path.abspath("../../data")
SESSION_DIR_PATH = os.path.abspath("../../session")
TRAIN_DATASET_PATH = os.path.join(DATA_DIR_PATH, "jigsaw2019-train.csv")
TEST_DATASET_PATH = os.path.join(DATA_DIR_PATH, "jigsaw2019-test.csv")
LABEL_LIST = ['toxicity', 'obscene', 'sexual_explicit',
            'identity_attack', 'insult', 'threat']
IDENTITY_LIST = ['male', 'female', 'transgender', 'other_gender', 'heterosexual',
                'homosexual_gay_or_lesbian', 'bisexual','other_sexual_orientation',
                'christian', 'jewish', 'muslim', 'hindu','buddhist', 'atheist',
                'other_religion', 'black', 'white', 'asian', 'latino',
                'other_race_or_ethnicity', 'physical_disability',
                'intellectual_or_learning_disability',
                'psychiatric_or_mental_illness','other_disability']
SELECTED_IDENTITY_LIST = ['male', 'female', 'black', 'white', 'homosexual_gay_or_lesbian',
                    'christian', 'jewish', 'muslim', 'psychiatric_or_mental_illness']


In [78]:
SESSION_NAME = "roberta-pwbce_2022-03-28T17-41-19-543866"
CURRENT_SESSION_DIR_PATH = os.path.join(SESSION_DIR_PATH, SESSION_NAME)

LOG_FILE_NAME = f"{SESSION_NAME}.loguru.log"
MODEL_FILE_NAME = f"{SESSION_NAME}.model"
TEST_FILE_NAME = f"{SESSION_NAME}.test.csv"
VALIDATION_DATASET_NAME = f"{SESSION_NAME}.jigsaw2019-validation.csv"
VALIDATION_FILE_NAME = f"{SESSION_NAME}.validation.csv"
METRIC_FILE_NAME = f"{SESSION_NAME}.metric.json"
LOG_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, LOG_FILE_NAME)
MODEL_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, MODEL_FILE_NAME)
TEST_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, TEST_FILE_NAME)
VALIDATION_DATASET_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, VALIDATION_DATASET_NAME)
VALIDATION_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, VALIDATION_FILE_NAME)
METRIC_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, METRIC_FILE_NAME)

## Calculer le meilleur seuil pour un F1 max 

In [80]:
target_validation_df = pd.read_csv(VALIDATION_DATASET_FILE_PATH, index_col=0)
pred_validation_df = pd.read_csv(VALIDATION_FILE_PATH, index_col=0)

In [81]:
pred_tensor = torch.Tensor(pred_validation_df[LABEL_LIST].to_numpy())
target_tensor = torch.Tensor(target_validation_df[LABEL_LIST].to_numpy()).to(int)

In [123]:
thresholds = np.arange(0, 1, 0.001)
scores = [tm.F1Score(threshold=t)(pred_tensor, target_tensor) for t in tqdm(thresholds)]
# get best threshold
ix = np.argmax(scores)
best_thresholds = thresholds[ix]
best_f1 = scores[ix]
print(f"{best_thresholds=}", f"\n{float(best_f1)=}")

  0%|          | 0/1000 [00:00<?, ?it/s]

best_thresholds=0.761 
float(best_f1)=0.39986997842788696


## Performance sur le jeu de TEST

In [124]:
pred_test_df = pd.read_csv(TEST_FILE_PATH, index_col=0)
target_test_df = pd.read_csv(TEST_DATASET_PATH, index_col=0)
target_test_df[LABEL_LIST] = (target_test_df[LABEL_LIST] >= 0.5).astype(int)
target_test_df = target_test_df[~target_test_df.white.isna()]

In [128]:
accuracy = tm.Accuracy()
f1score = tm.F1Score(threshold=0.5)
recall = tm.Recall()
precision = tm.Precision()
auroc = tm.AUROC(num_classes=6)



In [129]:
pred= torch.Tensor(pred_test_df[LABEL_LIST].to_numpy())
target= torch.Tensor((target_test_df[LABEL_LIST]>=0.5).astype(int).values).to(int)

In [130]:
print("Accuracy :", accuracy(pred,target))
print("F1 Score :", f1score(pred,target))
print("Precision :", precision(pred,target))
print("Recall :", recall(pred,target))
print("AUROC :", auroc(pred,target))

Accuracy : tensor(0.9042)
F1 Score : tensor(0.3635)
Precision : tensor(0.2403)
Recall : tensor(0.7464)
AUROC : tensor(0.9028)
