In [24]:
import pandas as pd
import numpy as np
import os
import torchmetrics as tm
import torch
from tqdm.auto import tqdm

In [25]:
# Dataset
DATA_DIR_PATH = os.path.abspath("../../data")
SESSION_DIR_PATH = os.path.abspath("../../session")
TRAIN_DATASET_PATH = os.path.join(DATA_DIR_PATH, "jigsaw2019-train.csv")
TEST_DATASET_PATH = os.path.join(DATA_DIR_PATH, "jigsaw2019-test.csv")
LABEL_LIST = ['toxicity', 'obscene', 'sexual_explicit',
            'identity_attack', 'insult', 'threat']
IDENTITY_LIST = ['male', 'female', 'transgender', 'other_gender', 'heterosexual',
                'homosexual_gay_or_lesbian', 'bisexual','other_sexual_orientation',
                'christian', 'jewish', 'muslim', 'hindu','buddhist', 'atheist',
                'other_religion', 'black', 'white', 'asian', 'latino',
                'other_race_or_ethnicity', 'physical_disability',
                'intellectual_or_learning_disability',
                'psychiatric_or_mental_illness','other_disability']
SELECTED_IDENTITY_LIST = ['male', 'female', 'black', 'white', 'homosexual_gay_or_lesbian',
                    'christian', 'jewish', 'muslim', 'psychiatric_or_mental_illness']


In [3]:
SESSION_NAME = "pierre-roberta-pwbce_2022-04-01T00-16-50-635795"
CURRENT_SESSION_DIR_PATH = os.path.join(SESSION_DIR_PATH, SESSION_NAME)

LOG_FILE_NAME = f"{SESSION_NAME}.loguru.log"
MODEL_FILE_NAME = f"{SESSION_NAME}.model"
TEST_FILE_NAME = f"{SESSION_NAME}.test.csv"
VALIDATION_DATASET_NAME = f"{SESSION_NAME}.jigsaw2019-validation.csv"
VALIDATION_FILE_NAME = f"{SESSION_NAME}.validation.csv"
METRIC_FILE_NAME = f"{SESSION_NAME}.metric.json"
LOG_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, LOG_FILE_NAME)
MODEL_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, MODEL_FILE_NAME)
TEST_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, TEST_FILE_NAME)
VALIDATION_DATASET_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, VALIDATION_DATASET_NAME)
VALIDATION_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, VALIDATION_FILE_NAME)
METRIC_FILE_PATH = os.path.join(CURRENT_SESSION_DIR_PATH, METRIC_FILE_NAME)

## Calculer le meilleur seuil pour un F1 max 

In [4]:
target_validation_df = pd.read_csv(VALIDATION_DATASET_FILE_PATH, index_col=0)
pred_validation_df = pd.read_csv(VALIDATION_FILE_PATH, index_col=0)

In [5]:
pred_tensor = torch.Tensor(pred_validation_df[LABEL_LIST].to_numpy())
target_tensor = torch.Tensor(target_validation_df[LABEL_LIST].to_numpy()).to(int)

In [6]:
thresholds = np.arange(0, 1, 0.001)
scores = [tm.F1Score(threshold=t)(pred_tensor, target_tensor) for t in tqdm(thresholds)]
# get best threshold
ix = np.argmax(scores)
best_thresholds = thresholds[ix]
best_f1 = scores[ix]
print(f"{best_thresholds=}", f"\n{float(best_f1)=}")

  0%|          | 0/1000 [00:00<?, ?it/s]

best_thresholds=0.9590000000000001 
float(best_f1)=0.6591809988021851


## Performance sur le jeu de TEST

In [27]:
pred_test_df

Unnamed: 0,toxicity,obscene,sexual_explicit,identity_attack,insult,threat
0,0.997036,0.974258,0.874258,0.000620,0.988196,0.026945
1,0.990918,0.000867,0.000023,0.000243,0.991644,0.000082
2,0.996491,0.005192,0.000284,0.000203,0.994888,0.306850
3,0.000367,0.000038,0.000020,0.000051,0.000420,0.000023
4,0.007049,0.020725,0.010351,0.000224,0.003474,0.000500
...,...,...,...,...,...,...
194287,0.855642,0.004413,0.001169,0.004930,0.836272,0.050501
194288,0.946454,0.001487,0.000315,0.007047,0.101563,0.998236
194289,0.598870,0.000177,0.000381,0.990277,0.022358,0.000302
194290,0.998044,0.001010,0.845941,0.997775,0.916981,0.000566


In [33]:
pred_test_df = pd.read_csv(TEST_FILE_PATH, index_col=0)
target_test_df = pd.read_csv(TEST_DATASET_PATH, index_col=0)
target_test_df[LABEL_LIST] = (target_test_df[LABEL_LIST] >= 0.5).astype(int)

target_test_df = target_test_df.reset_index()
pred_test_df = pred_test_df[~target_test_df.white.isna()]
target_test_df = target_test_df[~target_test_df.white.isna()]

In [34]:
accuracy = tm.Accuracy()
f1score = tm.F1Score(threshold=0.9590000000000001)
f1score_multi = tm.F1Score(num_classes=6, average=None, threshold=0.9590000000000001)
recall = tm.Recall()
precision = tm.Precision()
auroc = tm.AUROC(num_classes=6)



In [35]:
pred= torch.Tensor(pred_test_df[LABEL_LIST].to_numpy())
target= torch.Tensor((target_test_df[LABEL_LIST]>=0.5).astype(int).values).to(int)

In [36]:
pred.shape

torch.Size([42823, 6])

In [37]:
target.shape

torch.Size([42823, 6])

In [38]:
print("Accuracy :", accuracy(pred,target))
print("F1 Score :", f1score(pred,target))
print("F1 Score per class :", f1score_multi(pred,target))
print("Precision :", precision(pred,target))
print("Recall :", recall(pred,target))
print("AUROC :", auroc(pred,target))

Accuracy : tensor(0.9225)
F1 Score : tensor(0.5629)
F1 Score per class : tensor([0.6237, 0.4943, 0.3261, 0.4152, 0.6600, 0.2155])
Precision : tensor(0.3106)
Recall : tensor(0.9150)
AUROC : tensor(0.9719)


Accuracy : tensor(0.9489)
F1 Score : tensor(0.6143)
F1 Score per class : tensor([0.6688, 0.4648, 0.3099, 0.3980, 0.6773, 0.2578])
Precision : tensor(0.3284)
Recall : tensor(0.9232)
AUROC : tensor(0.9809)