### Combine results

In [1]:
import pandas as pd
import numpy as np

from datetime import datetime
from glob import glob
from config import RESULTS

now = datetime.today().strftime("%d%m%Y %H%M")

In [114]:
baselines = glob(RESULTS + 'counterfactuals_*seed-*.json')
candidates = [sorted(glob(RESULTS + f'counterfactuals_{dataset}_{model}_top-5_*_counterfactualgan.json'), reverse=dataset != 'hatespeech')[:5]
              for model in ('whitebox', 'infersent', 'bert')
              for dataset in ('sst', 'hatespeech', 'snli')]

In [None]:
c_ = pd.concat([pd.read_json(f) for f in baselines + [l for li in candidates for l in li]]) \
      .sort_values(by=['dataset', 'model', 'explanation_method']) \
      .reset_index(drop=True)
c_.to_json(RESULTS + f'counterfactuals-{now}.json')

#### Fidelity

In [105]:
fid = c_.groupby(['model', 'dataset', 'explanation_method'])['fidelity']
fid = pd.concat([fid.mean(), fid.std()], axis=1).apply(lambda s: f'{s[0]:.3f} + {s[1]:.3f}', axis=1) \
        .unstack(2).T.swaplevel(0, 1, 1) \
        .sort_index(axis=1, key=lambda x: x.map({'hatespeech':0, 'whitebox':0, 'sst': 1, 'infersent': 1, 'snli': 2, 'bert': 2})) \
        .sort_index(axis=0, key=lambda x: x.map({'sedc': 0, 'pwwsantonym': 1, 'ebert': 2, 'textfooler': 3, 'counterfactualgan (top-5)': 4}))
fid

dataset,hatespeech,hatespeech,hatespeech,sst,sst,sst,snli,snli,snli
model,whitebox,infersent,bert,whitebox,infersent,bert,whitebox,infersent,bert
explanation_method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
sedc,0.169 + 0.028,0.115 + 0.001,0.122 + 0.001,0.629 + 0.009,0.683 + 0.020,0.652 + 0.015,0.407 + 0.001,0.398 + 0.003,0.477 + 0.003
pwwsantonym,0.169 + 0.002,0.124 + 0.002,0.130 + 0.002,0.694 + 0.010,0.686 + 0.043,0.641 + 0.014,0.400 + 0.002,0.408 + 0.004,0.493 + 0.002
ebert,0.229 + 0.001,0.239 + 0.002,0.243 + 0.001,0.465 + 0.010,0.478 + 0.015,0.440 + 0.018,0.330 + 0.004,0.313 + 0.001,0.313 + 0.002
textfooler,0.132 + 0.002,0.223 + 0.002,0.235 + 0.002,0.643 + 0.014,0.645 + 0.012,0.574 + 0.019,0.322 + 0.015,0.244 + 0.006,0.271 + 0.008
counterfactualgan (top-5),0.136 + 0.002,0.097 + 0.031,0.154 + 0.044,0.798 + 0.015,0.890 + 0.010,0.902 + 0.020,0.487 + 0.049,0.534 + 0.028,0.462 + 0.008


In [106]:
print(fid.to_latex().replace(' + ', '\\rpm ').replace(' 0.', ' .'))

\begin{tabular}{llllllllll}
\toprule
dataset & \multicolumn{3}{l}{hatespeech} & \multicolumn{3}{l}{sst} & \multicolumn{3}{l}{snli} \\
model &       whitebox &      infersent &           bert &       whitebox &      infersent &           bert &       whitebox &      infersent &           bert \\
explanation\_method        &                &                &                &                &                &                &                &                &                \\
\midrule
sedc                      &  .169\rpm .028 &  .115\rpm .001 &  .122\rpm .001 &  .629\rpm .009 &  .683\rpm .020 &  .652\rpm .015 &  .407\rpm .001 &  .398\rpm .003 &  .477\rpm .003 \\
pwwsantonym               &  .169\rpm .002 &  .124\rpm .002 &  .130\rpm .002 &  .694\rpm .010 &  .686\rpm .043 &  .641\rpm .014 &  .400\rpm .002 &  .408\rpm .004 &  .493\rpm .002 \\
ebert                     &  .229\rpm .001 &  .239\rpm .002 &  .243\rpm .001 &  .465\rpm .010 &  .478\rpm .015 &  .440\rpm .018 &  .330\rpm .004 &  

#### Semantic similarity

In [122]:
sem = c_.groupby(['model', 'dataset', 'explanation_method'])['semantic']
sem = pd.concat([1.0 - sem.mean(), sem.std()], axis=1).apply(lambda s: f'{s[0]:.2f}', axis=1) \
        .unstack(2).T.swaplevel(0, 1, 1) \
        .sort_index(axis=1, key=lambda x: x.map({'hatespeech':0, 'whitebox':0, 'sst': 1, 'infersent': 1, 'snli': 2, 'bert': 2})) \
        .sort_index(axis=0, key=lambda x: x.map({'sedc': 0, 'pwwsantonym': 1, 'ebert': 2, 'textfooler': 3, 'counterfactualgan (top-5)': 4}))
sem

dataset,hatespeech,hatespeech,hatespeech,sst,sst,sst,snli,snli,snli
model,whitebox,infersent,bert,whitebox,infersent,bert,whitebox,infersent,bert
explanation_method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
sedc,0.2,0.19,0.19,0.2,0.2,0.21,0.1,0.09,0.09
pwwsantonym,0.17,0.18,0.18,0.18,0.17,0.18,0.11,0.12,0.1
ebert,0.12,0.12,0.12,0.12,0.12,0.12,0.06,0.08,0.06
textfooler,0.21,0.07,0.07,0.21,0.26,0.2,0.22,0.24,0.27
counterfactualgan (top-5),0.32,0.37,0.21,0.32,0.37,0.41,0.11,0.36,0.37


In [123]:
print(sem.to_latex().replace(' + ', '\\rpm ').replace(' 0.', ' .'))

\begin{tabular}{llllllllll}
\toprule
dataset & \multicolumn{3}{l}{hatespeech} & \multicolumn{3}{l}{sst} & \multicolumn{3}{l}{snli} \\
model &   whitebox & infersent &  bert & whitebox & infersent &  bert & whitebox & infersent &  bert \\
explanation\_method        &            &           &       &          &           &       &          &           &       \\
\midrule
sedc                      &       .20 &      .19 &  .19 &     .20 &      .20 &  .21 &     .10 &      .09 &  .09 \\
pwwsantonym               &       .17 &      .18 &  .18 &     .18 &      .17 &  .18 &     .11 &      .12 &  .10 \\
ebert                     &       .12 &      .12 &  .12 &     .12 &      .12 &  .12 &     .06 &      .08 &  .06 \\
textfooler                &       .21 &      .07 &  .07 &     .21 &      .26 &  .20 &     .22 &      .24 &  .27 \\
counterfactualgan (top-5) &       .32 &      .37 &  .21 &     .32 &      .37 &  .41 &     .11 &      .36 &  .37 \\
\bottomrule
\end{tabular}



#### Time

In [100]:
time = c_.groupby(['dataset', 'explanation_method'])['inference_time'].mean() / 60.
time

dataset     explanation_method       
hatespeech  counterfactualgan (top-5)     0.384241
            ebert                         5.486953
            pwwsantonym                   1.041880
            sedc                          0.983457
            textfooler                   24.690040
snli        counterfactualgan (top-5)     1.385634
            ebert                         9.104822
            pwwsantonym                   2.732665
            sedc                          2.533796
            textfooler                   16.237449
sst         counterfactualgan (top-5)     0.167270
            ebert                         0.788701
            pwwsantonym                   0.246023
            sedc                          0.210332
            textfooler                   24.360862
Name: inference_time, dtype: float64

#### Dissimilarity

In [124]:
def similarity_percentage(row):
    lengths = {'hatespeech': 19.1, 'sst': 19.2, 'snli': 20.3}[row['dataset']]
    return np.mean(np.array(row['X_sim'])) / lengths

In [125]:
c_['new_dis'] = c_[['dataset', 'X_sim']].apply(similarity_percentage, axis=1)

In [126]:
dis = c_.groupby(['model', 'dataset', 'explanation_method'])['new_dis']
dis = pd.concat([dis.mean(), dis.std()], axis=1).apply(lambda s: f'{s[0]:.3f} + {s[1]:.3f}', axis=1) \
        .unstack(2).T.swaplevel(0, 1, 1) \
        .sort_index(axis=1, key=lambda x: x.map({'hatespeech':0, 'whitebox':0, 'sst': 1, 'infersent': 1, 'snli': 2, 'bert': 2})) \
        .sort_index(axis=0, key=lambda x: x.map({'lfo': 0, 'wordnet': 1, 'ebert': 2}))
dis

dataset,hatespeech,hatespeech,hatespeech,sst,sst,sst,snli,snli,snli
model,whitebox,infersent,bert,whitebox,infersent,bert,whitebox,infersent,bert
explanation_method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
ebert,0.208 + 0.000,0.208 + 0.000,0.208 + 0.001,0.068 + 0.001,0.067 + 0.001,0.067 + 0.000,0.759 + 0.000,0.759 + 0.000,0.759 + 0.000
counterfactualgan (top-5),0.613 + 0.034,0.915 + 0.311,0.344 + 0.420,0.499 + 0.285,0.630 + 0.057,0.690 + 0.007,0.488 + 0.052,1.015 + 0.127,0.945 + 0.126
pwwsantonym,0.155 + 0.019,0.167 + 0.001,0.162 + 0.001,0.166 + 0.001,0.143 + 0.013,0.153 + 0.002,0.763 + 0.000,0.757 + 0.001,0.756 + 0.000
sedc,0.337 + 0.288,0.208 + 0.001,0.200 + 0.002,0.195 + 0.001,0.187 + 0.004,0.205 + 0.002,0.755 + 0.001,0.743 + 0.001,0.745 + 0.000
textfooler,0.245 + 0.000,0.006 + 0.000,0.006 + 0.000,0.348 + 0.000,0.370 + 0.000,0.268 + 0.000,0.991 + 0.000,0.999 + 0.000,1.005 + 0.000


In [127]:
print(dis.to_latex().replace(' + ', '\\rpm ').replace(' 0.', ' .'))

\begin{tabular}{llllllllll}
\toprule
dataset & \multicolumn{3}{l}{hatespeech} & \multicolumn{3}{l}{sst} & \multicolumn{3}{l}{snli} \\
model &       whitebox &      infersent &           bert &       whitebox &      infersent &           bert &       whitebox &      infersent &           bert \\
explanation\_method        &                &                &                &                &                &                &                &                &                \\
\midrule
ebert                     &  .208\rpm .000 &  .208\rpm .000 &  .208\rpm .001 &  .068\rpm .001 &  .067\rpm .001 &  .067\rpm .000 &  .759\rpm .000 &  .759\rpm .000 &  .759\rpm .000 \\
counterfactualgan (top-5) &  .613\rpm .034 &  .915\rpm .311 &  .344\rpm .420 &  .499\rpm .285 &  .630\rpm .057 &  .690\rpm .007 &  .488\rpm .052 &  1.015\rpm .127 &  .945\rpm .126 \\
pwwsantonym               &  .155\rpm .019 &  .167\rpm .001 &  .162\rpm .001 &  .166\rpm .001 &  .143\rpm .013 &  .153\rpm .002 &  .763\rpm .000 & 

#### Performance

In [130]:
p_ = pd.concat([pd.read_csv(f) for f in glob(RESULTS + '/performance_*.csv')])
p_.to_csv(RESULTS + f'performance-{now}.csv', index=None)
p_

Unnamed: 0,predictive_model,dataset,performance,model
0,whitebox,Hatespeech,{'mse': 0.0877286064252976},whitebox
1,infersent,Hatespeech,{'mse': 0.1265283226966858},infersent
2,bert,Hatespeech,{'mse': 0.1222558245062828},bert
0,whitebox,SST,{'f1_score': 0.6790986790986792},whitebox
1,infersent,SST,{'f1_score': 0.7974686622619629},infersent
2,bert,SST,{'f1_score': 0.8833869099617004},bert


In [129]:
print(p_.to_latex())

\begin{tabular}{lllll}
\toprule
{} & predictive\_model &     dataset &                       performance &      model \\
\midrule
0 &         whitebox &  Hatespeech &       \{'mse': 0.0877286064252976\} &   whitebox \\
1 &        infersent &  Hatespeech &       \{'mse': 0.1265283226966858\} &  infersent \\
2 &             bert &  Hatespeech &       \{'mse': 0.1222558245062828\} &       bert \\
0 &         whitebox &         SST &  \{'f1\_score': 0.6790986790986792\} &   whitebox \\
1 &        infersent &         SST &  \{'f1\_score': 0.7974686622619629\} &  infersent \\
2 &             bert &         SST &  \{'f1\_score': 0.8833869099617004\} &       bert \\
\bottomrule
\end{tabular}



#### IDs for human experiment

In [31]:
# Hatespeech
idx = [179, 182, 359, 380, 404, 859, 886, 1075, 1616, 1643, 1849,
       1889, 1950, 1981, 2299, 2429, 2959, 2974, 3047, 3131, 3365,
       3415, 3521, 3595, 3638, 3851, 3882, 4095, 4118, 4141]

# SST
idx = [10, 23, 36, 90, 93, 94, 126, 136, 160, 180, 202,
       244, 245, 266, 279, 298, 319, 431, 453, 484, 589,
       642, 652, 842, 957, 1117, 1149, 1230, 1304, 1327]

# SNLI
idx = [180, 257, 364, 415, 550, 662, 746, 781, 1289, 2039, 2483,
       2584, 2886, 4311, 4323, 4792, 5507, 6168, 6206, 6390, 6513,
       6689, 6952, 7120, 7464, 7836, 8050, 8613, 8699, 9187]