In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [20]:
interventions = ['intervention_0', 'intervention_1', 'intervention_2', 'intervention_3']
models = ['bert-base-uncased-snli', 
'bert-base-uncased-snli-help',
'roberta-large-mnli',
'roberta-large-mnli-help',
'facebook/bart-large-mnli', 
'facebook/bart-large-mnli-help', 
'roberta-large-snli_mnli_fever_anli_R1_R2_R3',
'infobert']
# models = ['bert-base-uncased-snli', 'bert-base-uncased-snli-help', 'roberta-large-mnli', 'roberta-large-mnli-help', 'roberta-large-mnli-double-finetuning']
# models = ['facebook/bart-large-mnli', 'facebook/bart-large-mnli-help']

intervention_names = {
    'intervention_0': r' $\mbox{DCE}(S_{W} \to R)$',
    'intervention_1': r'$\mbox{TCE}(W \to R)$',
    'intervention_2': r'$\mbox{DCE}(S_{C} \to R)$',
    'intervention_3': r'$\mbox{TCE}(C \to R)$',
}

In [21]:
def plot_metric_comparison(result_set, interventions, metric_col):
    #TODO intervention keys -> names\n",

    fig = go.Figure()
    for intervention in interventions:
        fig.add_trace(go.Bar(
            name=intervention_names[f'{intervention}'],
            x=result_set.model,
            y=result_set[f'{intervention}_mean'],
            # error_y=dict(type='data', array=result_set[f'{intervention}_sem'])
        ))
    fig.update_layout(
        barmode='group',
        width=600)
    if metric_col in ['actual_two_class_ausal_effect', 'error_change']:
        fig.update_layout(
            yaxis_range=[0,1],
        )
    return fig

In [22]:
# Build Results Table (means only)
def get_tce_dce_ratio(row, interventions, column):
    ratio=float(row[(interventions[1], column)]) / float(row[(interventions[0], column)])
    return ratio



In [23]:
# Build Results Table (means only)
def get_tce_dce_ratio(row, interventions, column):
    ratio=float(row[(interventions[1], column)]) / float(row[(interventions[0], column)])
    return ratio



interventions = ['intervention_2', 'intervention_3']
columns = ['model', 'causal_effect', 'relative_confidence_change', 'error_change']

intervention_results = {}

for intervention in interventions:
    for column in columns:
        intervention_results[(intervention, column)] = []
    for model in models:
        intervention_results[(intervention, 'model')].append(model)
        model_result_path = f'../experiments/results/{intervention}/{model}/summary_results.tsv'
        model_result_df = pd.read_csv(model_result_path, sep='\t', index_col=0)

        for column in columns[1:]:
            intervention_results[(intervention,column)].append(model_result_df.at['mean', column])

# additonal ratio
intervention_results = pd.DataFrame(intervention_results)
intervention_results[('ratio', 'causal_effect')] = intervention_results.apply(lambda row: get_tce_dce_ratio(row, interventions ,column), axis=1)

intervention_results


        # intervention_result[f'{intervention}_mean'].append(result_df.at['mean', columns])

Unnamed: 0_level_0,intervention_2,intervention_2,intervention_2,intervention_2,intervention_3,intervention_3,intervention_3,intervention_3,ratio
Unnamed: 0_level_1,model,causal_effect,relative_confidence_change,error_change,model,causal_effect,relative_confidence_change,error_change,causal_effect
0,bert-base-uncased-snli,0.411832,0.0,0.547224,bert-base-uncased-snli,0.468151,2.580045,0.662187,1.210083
1,bert-base-uncased-snli-help,0.406127,0.0,0.406127,bert-base-uncased-snli-help,0.48509,4.082144,1.0119,2.491584
2,roberta-large-mnli,0.107393,0.0,0.079538,roberta-large-mnli,0.080638,38.71511,0.943301,11.85978
3,roberta-large-mnli-help,0.163295,0.0,0.163295,roberta-large-mnli-help,0.827943,2051487.0,0.172057,1.053657
4,facebook/bart-large-mnli,0.13659,0.0,0.209128,facebook/bart-large-mnli,0.130337,34.83517,0.916842,4.384109
5,facebook/bart-large-mnli-help,0.189855,0.0,0.189855,facebook/bart-large-mnli-help,0.791264,5420883.0,0.208736,1.099447
6,roberta-large-snli_mnli_fever_anli_R1_R2_R3,0.093058,0.0,0.131652,roberta-large-snli_mnli_fever_anli_R1_R2_R3,0.093868,2.009292,0.957441,7.272506
7,infobert,0.127337,0.0,0.165069,infobert,0.176326,29.60717,0.926151,5.610708


In [24]:
metric_col = 'causal_effect'
interventions = ['intervention_0', 'intervention_1']
result_set = {}

for item in ['mean', 'sem', 'std']:
    for intervention in interventions:
        result_set['model'] = []
        result_set[f'{intervention}_{item}'] = []
        for model in models:
            results_path = f'../experiments/results/{intervention}/{model}/summary_results.tsv'
            results = pd.read_csv(results_path, sep='\t', index_col=0)
            result_set['model'].append(model)
            result_set[f'{intervention}_{item}'].append(results.at[item, metric_col])

result_set = pd.DataFrame(result_set)

fig = plot_metric_comparison(result_set, interventions, metric_col)
fig.update_layout(
    title=r'Insertion Interventions: Causal Effect on Prediction',
    barmode='group',
    width=600)
fig.show()

fig.write_image('./results/plots/insertion_change_in_prediction.pdf')

In [25]:
metric_col = 'causal_effect'
interventions = ['intervention_2', 'intervention_3']
result_set = {}

for item in ['mean', 'sem', 'std']:
    for intervention in interventions:
        result_set['model'] = []
        result_set[f'{intervention}_{item}'] = []
        for model in models:
            results_path = f'../experiments/results/{intervention}/{model}/summary_results.tsv'
            results = pd.read_csv(results_path, sep='\t', index_col=0)
            result_set['model'].append(model)
            result_set[f'{intervention}_{item}'].append(results.at[item, metric_col])

result_set = pd.DataFrame(result_set)

fig = plot_metric_comparison(result_set, interventions, metric_col)
fig.update_layout(
        title=r'Context Interventions: Causal Effect on Prediction',
    )
fig.show()
fig.write_image('./results/plots/context_prediction_change.pdf')

In [9]:
metric_col = 'relative_confidence_change'
interventions = ['intervention_2', 'intervention_3']
result_set = {}

for item in ['mean', 'sem', 'std']:
    for intervention in interventions:
        result_set['model'] = []
        result_set[f'{intervention}_{item}'] = []
        for model in models:
            results_path = f'../experiments/results/{intervention}/{model}/summary_results.tsv'
            results = pd.read_csv(results_path, sep='\t', index_col=0)
            result_set['model'].append(model)
            result_set[f'{intervention}_{item}'].append(results.at[item, metric_col])

result_set = pd.DataFrame(result_set)

fig = plot_metric_comparison(result_set, interventions, metric_col)
fig.update_layout(
        title=r'Context Interventions: Relative Confidence Change',
    )
fig.show()
fig.write_image('./results/plots/context_rcc.pdf')

In [10]:
metric_col = 'error_change'
interventions = ['intervention_2', 'intervention_3']
result_set = {}

for item in ['mean', 'sem', 'std']:
    for intervention in interventions:
        result_set['model'] = []
        result_set[f'{intervention}_{item}'] = []
        for model in models:
            results_path = f'../experiments/results/{intervention}/{model}/summary_results.tsv'
            results = pd.read_csv(results_path, sep='\t', index_col=0)
            result_set['model'].append(model)
            result_set[f'{intervention}_{item}'].append(results.at[item, metric_col])

result_set = pd.DataFrame(result_set)

fig = plot_metric_comparison(result_set, interventions, metric_col)
fig.update_layout(
        title=r'Context Interventions: Error Change',
    )
fig.show()
fig.write_image('./results/plots/context_error_change.pdf')

In [26]:
result_set

Unnamed: 0,model,intervention_2_mean,intervention_3_mean,intervention_2_sem,intervention_3_sem,intervention_2_std,intervention_3_std
0,bert-base-uncased-snli,0.458146,0.627887,0.003758,0.004804,0.542748,0.5742
1,bert-base-uncased-snli-help,0.406127,0.51491,0.003401,0.004182,0.491121,0.499795
2,roberta-large-mnli,0.079538,0.943301,0.002357,0.002005,0.340421,0.2396
3,roberta-large-mnli-help,0.163295,0.172057,0.002559,0.003158,0.369643,0.377443
4,roberta-large-mnli-double-finetuning,0.158357,0.186266,0.002528,0.003257,0.365084,0.389335


In [27]:
print(result_set.to_latex())

\begin{tabular}{llrrrrrr}
\toprule
{} &                                 model &  intervention\_2\_mean &  intervention\_3\_mean &  intervention\_2\_sem &  intervention\_3\_sem &  intervention\_2\_std &  intervention\_3\_std \\
\midrule
0 &                bert-base-uncased-snli &             0.458146 &             0.627887 &            0.003758 &            0.004804 &            0.542748 &            0.574200 \\
1 &           bert-base-uncased-snli-help &             0.406127 &             0.514910 &            0.003401 &            0.004182 &            0.491121 &            0.499795 \\
2 &                    roberta-large-mnli &             0.079538 &             0.943301 &            0.002357 &            0.002005 &            0.340421 &            0.239600 \\
3 &               roberta-large-mnli-help &             0.163295 &             0.172057 &            0.002559 &            0.003158 &            0.369643 &            0.377443 \\
4 &  roberta-large-mnli-double-finetuning &     


In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.

