## Imports

In [1]:
import os
os.chdir('..') 
PROJ_DIR = os.getcwd()
print(PROJ_DIR)
from collections import defaultdict
import pandas as pd
from IPython.display import HTML
import hate_datasets as dc
import kg_adaptation as kg_adapt
import identity_group_identification as model_training
from functions.helper import get_metrics

dnames = ['xtremespeech', 'gabhatecorpus', 'hatexplain']
# Evaluation variables
thr = 0.5 
weight_fs = [None, 'docf', 'logits', 'multiNB']
infer_methods = ['none', 'hierarchical']
weights_folder = os.path.join(PROJ_DIR, 'models', 'adaptation')
kg_path = os.path.join(weights_folder, 'gsso.owl')
identity = 'target_gso'

/Users/prl222/OneDrive - The Open University/Projects/hate-speech-identities


2023-03-28 18:16:56.260740: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


## Train and export weights for KG entities

In [None]:
! bash notebooks/pretrain_kg.sh

## Evaluation

In [2]:
performance = defaultdict(lambda: defaultdict(dict))
for dname in dnames:
    print(f'\n{dname.upper()}')
    # Import data
    d0, text_col0, id_col0, identities_dict = dc.import_dataset(dname)
    data, target_cols, text_col, id_col = dc.prepare_for_model_evaluation(
        d0, text_col0, id_col0, identities_dict, thr
    )
    if dname == 'xtremespeech':
        # Include only texts in English
        data = data.loc[data['Language'] == 'English'].reset_index(drop=True).copy()
        print(f'... {data.shape[0]} english texts.')

    # Apply feature extraction
    y_trues = data[identity].values 
    for infer_method in infer_methods:
        for weight_f in weight_fs:
            # ... get entities (without saving a checkpoint)
            method_name = f'{thr}-stem-{infer_method}-{weight_f}'
            weights_root = os.path.join(weights_folder, f'gsso_jigsaw_gendersexualorientation_{method_name}')
            entities = model_training.get_entities(df=data, 
                                                   text_col=text_col, 
                                                   id_col=id_col, 
                                                   kg_path=kg_path, 
                                                   match_method='stem', 
                                                   weights_root=weights_root, 
                                                   weight_f=weight_f, 
                                                   verbose=False)
            # ... get weights
            o_cols = [f'{infer_method}_{str(weight_f)}_y_preds', f'{str(weight_f)}_{infer_method}_term']
            data[o_cols] = model_training.get_weights(entities=entities, 
                                                    kg_path=kg_path, 
                                                    weights_root=weights_root, 
                                                    infer_method=infer_method,
                                                    weight_f=weight_f, 
                                                    verbose=False)
            # ... compute metrics
            if weight_f in kg_adapt.WEIGHT_BY_SCORE:
                # y_preds = [weight if weight > 0 else 0 for weight in data[f'{infer_method}_{str(weight_f)}_y_preds']]
                # y_preds_hard = [1 if weight > 0 else 0 for weight in data[f'{infer_method}_{str(weight_f)}_y_preds']]
                thr_hard = 0.0
            else:
                # y_preds = data[f'{infer_method}_{str(weight_f)}_y_preds'].values
                # y_preds_hard = y_preds >= thr
                thr_hard = 0.5
            y_preds = data[f'{infer_method}_{str(weight_f)}_y_preds'].values
            y_preds_hard = y_preds >= thr_hard
            metrics = {m: v[0] for m, v in get_metrics([y_trues], [y_preds_hard], [y_preds]).items()}
            performance[infer_method][dname][f'{str(weight_f)}'] = metrics
            print(f'{infer_method}_{str(weight_f)}. Threshold for binary predictions {thr_hard}')
    # Save predictions for inspection?
    # data.to_csv(os.path.join(weights_folder, f'{dname}.csv'))


XTREMESPEECH
xtremespeech imported successfully from data folder: 5063 annotations samples.
... 2639 english texts.
indexed 2639 documents
none_None. Threshold for binary predictions 0.5
indexed 2639 documents
none_docf. Threshold for binary predictions 0.0
indexed 2639 documents
none_logits. Threshold for binary predictions 0.5
indexed 2639 documents
none_multiNB. Threshold for binary predictions 0.5
indexed 2639 documents
hierarchical_None. Threshold for binary predictions 0.5
indexed 2639 documents
hierarchical_docf. Threshold for binary predictions 0.0
indexed 2639 documents
hierarchical_logits. Threshold for binary predictions 0.5
indexed 2639 documents
hierarchical_multiNB. Threshold for binary predictions 0.5

GABHATECORPUS
gabhatecorpus imported successfully from data folder: 7813 annotations samples.
indexed 7813 documents
none_None. Threshold for binary predictions 0.5
indexed 7813 documents
none_docf. Threshold for binary predictions 0.0
indexed 7813 documents
none_logits. 

In [3]:
# Table 2 (left): No Inference
for dname in dnames:
    performance_n = pd.DataFrame.from_dict(performance['none'][dname], orient='index') 
    print(f'Performance in: {dname.upper()}')
    display(performance_n)

Performance in: XTREMESPEECH


Unnamed: 0,Accuracy,Chance,F1,ROC AUC,PR AUC
,8.49,93.22,12.91,50.91,6.9
docf,15.27,93.22,13.73,79.8,30.71
logits,89.35,93.22,42.77,84.42,33.83
multiNB,82.57,93.22,37.16,86.51,43.39


Performance in: GABHATECORPUS


Unnamed: 0,Accuracy,Chance,F1,ROC AUC,PR AUC
,14.9,88.97,20.34,51.51,11.34
docf,27.44,88.97,22.37,73.84,42.66
logits,90.39,88.97,54.46,85.63,54.71
multiNB,81.89,88.97,49.19,88.67,62.27


Performance in: HATEXPLAIN


Unnamed: 0,Accuracy,Chance,F1,ROC AUC,PR AUC
,25.64,76.66,38.19,50.94,23.68
docf,39.63,76.66,41.66,66.05,45.93
logits,78.55,76.66,47.16,79.42,54.97
multiNB,78.14,76.66,62.03,85.91,68.44


In [4]:
# Table 2 (right): Hierarchical Entity Inference
for dname in dnames:
    performance_h = pd.DataFrame.from_dict(performance['hierarchical'][dname], orient='index') 
    print(f'Performance in: {dname.upper()}')
    display(performance_h)

Performance in: XTREMESPEECH


Unnamed: 0,Accuracy,Chance,F1,ROC AUC,PR AUC
,8.49,93.22,12.91,50.91,6.9
docf,7.16,93.22,12.75,75.12,21.79
logits,89.09,93.22,42.17,83.22,33.44
multiNB,84.58,93.22,37.09,81.49,39.68


Performance in: GABHATECORPUS


Unnamed: 0,Accuracy,Chance,F1,ROC AUC,PR AUC
,14.9,88.97,20.34,51.51,11.34
docf,11.69,88.97,19.95,69.08,35.54
logits,89.98,88.97,53.31,83.8,53.8
multiNB,81.83,88.97,45.3,84.72,55.68


Performance in: HATEXPLAIN


Unnamed: 0,Accuracy,Chance,F1,ROC AUC,PR AUC
,25.64,76.66,38.19,50.94,23.68
docf,23.61,76.66,37.92,55.06,39.58
logits,78.54,76.66,47.43,77.55,53.85
multiNB,77.05,76.66,57.03,81.45,61.89
