In [1]:
import plotly.express as px
import os
import sys
import numpy as np

%load_ext autoreload
%autoreload 2

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from utils import load_json
from collections import defaultdict
from data_selection.data_selection_utils import float_floor

from consistency import (
    dnli_human_bucket_predictions,
    dnli_test_set_predictions,
    anli_human_bucket_predictions,
    anli_test_set_predictions,
    construct_bucket_metadata, 
    plot_orig_v_bucket_conf, 
    plot_consistency_cdf,
    get_original_example_prediction_accuracy,
    calculate_weighted_consistency
)

In [29]:
def get_consistencies(model_name):
    for dataname in dnli_human_bucket_predictions.keys():
        test_set_preds = dnli_test_set_predictions[dataname][model_name] if model_name in dnli_test_set_predictions[dataname].keys() else None
        human_preds = dnli_human_bucket_predictions[dataname][model_name] if model_name in dnli_human_bucket_predictions[dataname].keys() else None
        
        print(dataname, calculate_weighted_consistency(
                paraphrase_predictions=human_preds,
                test_set_predictions=test_set_preds,
                show_test_distribution=False
            )
        )
        print()
    print(f'####### anli #######')
    
    if model_name in anli_human_bucket_predictions.keys() and anli_test_set_predictions.keys():
        print('anli', calculate_weighted_consistency(anli_human_bucket_predictions[model_name],anli_test_set_predictions[model_name],show_test_distribution=False))

In [30]:
get_consistencies('specialized_roberta')

snli {'accuracy': 0.512, 'mean_consistency': 0.7052253968253969, 'weighted_consistency': 0.9064806546026608}

atomic {'accuracy': 0.536, 'mean_consistency': 0.7478507936507937, 'weighted_consistency': 0.8616549681381606}

social {'accuracy': 0.516, 'mean_consistency': 0.6592190476190476, 'weighted_consistency': 0.9087363234357935}

####### anli #######
anli {'accuracy': 0.536, 'mean_consistency': 0.6805238095238096, 'weighted_consistency': 0.8744013294599748}


In [None]:
get_consistencies('unified_roberta')

In [None]:
get_consistencies('specialized_full_input_lexical')

In [None]:
get_consistencies('gpt3-curie')

In [None]:
get_consistencies('bilstm')

## Specialized RoBERTa

In [2]:
from consistency import plot_buckets

In [23]:
plot = plot_buckets('α-NLI RoBERTa', anli_human_bucket_predictions['specialized_roberta'])
plot.write_image('plots/specialized_roberta/anli-human-roberta.pdf')
plot.show()

plot = plot_buckets('δ-SNLI RoBERTa', dnli_human_bucket_predictions['snli']['specialized_roberta'])
plot.write_image('plots/specialized_roberta/snli-human-roberta.pdf')
plot.show()

plot = plot_buckets('δ-SOCIAL RoBERTa', dnli_human_bucket_predictions['social']['specialized_roberta'])
plot.write_image('plots/specialized_roberta/social-human-roberta.pdf')
plot.show()

plot = plot_buckets('δ-ATOMIC RoBERTa', dnli_human_bucket_predictions['atomic']['specialized_roberta'])
plot.write_image('plots/specialized_roberta/atomic-human-roberta.pdf')
plot.show()

In [None]:
from annotated_data.annotated_data import anli_human
from abductive_data import anli_dataset

## Unified RoBERTa

In [24]:
plot = plot_buckets('δ-SNLI Unified RoBERTa', dnli_human_bucket_predictions['snli']['unified_roberta'])
plot.write_image('plots/unified_roberta/snli-human-roberta-unified.pdf')
plot.show()

plot = plot_buckets('δ-SOCIAL Unified RoBERTa', dnli_human_bucket_predictions['social']['unified_roberta'])
plot.write_image('plots/unified_roberta/social-human-roberta-unified.pdf')
plot.show()

plot = plot_buckets('δ-ATOMIC Unified RoBERTa', dnli_human_bucket_predictions['atomic']['unified_roberta'])
plot.write_image('plots/unified_roberta/atomic-human-roberta-unified.pdf')
plot.show()

## LSTM

In [25]:
# plot = plot_buckets('α-NLI RoBERTa', anli_human_bucket_predictions['bilstm'])
# plot.write_image('plots/lstm/anli-human-bilstm.pdf')

plot = plot_buckets('δ-SNLI BiLSTM', dnli_human_bucket_predictions['snli']['bilstm'])
plot.write_image('plots/lstm/snli-human-bilstm.pdf')
plot.show()

plot = plot_buckets('δ-SOCIAL BiLSTM', dnli_human_bucket_predictions['social']['bilstm'])
plot.write_image('plots/lstm/social-human-bilstm.pdf')
plot.show()

plot = plot_buckets('δ-ATOMIC BiLSTM', dnli_human_bucket_predictions['atomic']['bilstm'])
plot.write_image('plots/lstm/atomic-human-bilstm.pdf')
plot.show()

## FastText BOW

In [26]:
# plot = plot_buckets('α-NLI RoBERTa', anli_human_bucket_predictions['bilstm'])
# plot.write_image('plots/lstm/anli-human-bilstm.pdf')

plot = plot_buckets('δ-SNLI Lexical', dnli_human_bucket_predictions['snli']['specialized_full_input_lexical'])
plot.write_image('plots/lexical/snli-human-lexical.pdf')
plot.show()

plot = plot_buckets('δ-SOCIAL Lexical', dnli_human_bucket_predictions['social']['specialized_full_input_lexical'])
plot.write_image('plots/lexical/social-human-lexical.pdf')
plot.show()

plot = plot_buckets('δ-ATOMIC Lexical', dnli_human_bucket_predictions['atomic']['specialized_full_input_lexical'])
plot.write_image('plots/lexical/atomic-human-lexical.pdf')
plot.show()

## GPT-3 Curie

In [27]:
from utils import load_json

In [28]:
# plot = plot_buckets('α-NLI RoBERTa', anli_human_bucket_predictions['bilstm'])
# plot.write_image('plots/lstm/anli-human-bilstm.pdf')

plot = plot_buckets('δ-SNLI GPT-3', dnli_human_bucket_predictions['snli']['gpt3-curie'])
plot.write_image('plots/gpt3/snli-human-gpt3.pdf')
plot.show()

plot = plot_buckets('δ-SOCIAL GPT-3', dnli_human_bucket_predictions['social']['gpt3-curie'])
plot.write_image('plots/gpt3/social-human-gpt3.pdf')
plot.show()

plot = plot_buckets('δ-ATOMIC GPT-3', dnli_human_bucket_predictions['atomic']['gpt3-curie'])
plot.write_image('plots/gpt3/atomic-human-gpt3.pdf')
plot.show()