In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import plotly.express as px
import os
import sys
import numpy as np

In [3]:
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
from utils import load_json
from collections import defaultdict
from data_selection.data_selection_utils import float_floor

from consistency import (
    dnli_human_bucket_predictions,
    dnli_test_set_predictions,
    anli_human_bucket_predictions,
    anli_test_set_predictions,
    construct_bucket_metadata, 
    plot_orig_v_bucket_conf, 
    plot_consistency_cdf,
    get_original_example_prediction_accuracy,
    calculate_weighted_consistency
)

In [None]:
print('Specialized RoBERTa Models')
for dataname in dnli_human_bucket_predictions.keys():
    print(f'####### {dataname}#######')
    print(dataname, calculate_weighted_consistency(
            dnli_human_bucket_predictions[dataname]['specialized_roberta'],
            dnli_test_set_predictions[dataname]['specialized_roberta'],
            show_test_distribution=False
        )
    )
    
print('anli', calculate_weighted_consistency(anli_human_bucket_predictions['specialized_roberta'],anli_test_set_predictions['specialized_roberta'],show_test_distribution=True))

## Specialized RoBERTa

In [43]:
def plot_roberta(name, bucket_preds):
    get_original_example_prediction_accuracy(bucket_preds)
    metadata = construct_bucket_metadata(bucket_preds)
    plot = plot_orig_v_bucket_conf(metadata, name)
    return plot

In [47]:
plot = plot_roberta('α-NLI RoBERTa', anli_human_bucket_predictions['specialized_roberta'])
plot.write_image('anli-human-roberta.pdf')

plot = plot_roberta('δ-SNLI RoBERTa', dnli_human_bucket_predictions['snli']['specialized_roberta'])
plot.write_image('snli-human-roberta.pdf')

plot = plot_roberta('δ-SOCIAL RoBERTa', dnli_human_bucket_predictions['social']['specialized_roberta'])
plot.write_image('social-human-roberta.pdf')

plot = plot_roberta('δ-ATOMIC RoBERTa', dnli_human_bucket_predictions['atomic']['specialized_roberta'])
plot.write_image('atomic-human-roberta.pdf')

In [33]:
from annotated_data.annotated_data import anli_human

In [None]:
for i in anli_human['anli.test.1063']:
    print(i)
    print()

In [None]:
for i in anli_human['anli.test.1105']:
    print(i)
    print()

In [None]:
from abductive_data import anli_dataset

In [None]:
print(anli_dataset.get_example_by_id('anli.test.854'))

In [None]:
for i in anli_human['anli.test.854']:
    print(i)
    print()

## FastText BOW

In [None]:
for b, dataset_buckets in dnli_human_bucket_predictions.items():
    print(b)
    print(
        'Specialized Lexical Model Accuracy:',
        get_original_example_prediction_accuracy(dataset_buckets['specialized_full_input_lexical'])
    )
    
    print(
        'Specialized Lexical Partial Input Model Accuracy:', 
        get_original_example_prediction_accuracy(dataset_buckets['specialized_partial_input_lexical'])
    )
    print()
    
    metadata = construct_bucket_metadata(dataset_buckets['specialized_full_input_lexical'])
    plot = plot_orig_v_bucket_conf(metadata, f'δ-{b} Lexical Model')
    plot.show()
    #plot_consistency_cdf(metadata, f'{b} Specialized Defeasible Lexical Consistency CDF')
    
    metadata = construct_bucket_metadata(dataset_buckets['specialized_partial_input_lexical'])
    plot_orig_v_bucket_conf(metadata, f'{b} Specialized Defeasible Lexical (Partial Input)')
    #plot_consistency_cdf(metadata, f'{b} Specialized Defeasible Lexical Consistency CDF')

## GPT-3 Curie

In [None]:
from utils import load_json

In [None]:
for b, dataset_buckets in dnli_human_bucket_predictions.items():
    print(b)
    print(
        'GPT-3 Curie Accuracy:',
        get_original_example_prediction_accuracy(dataset_buckets['gpt3-curie'])
    )
    
    metadata = construct_bucket_metadata(dataset_buckets['gpt3-curie'])
    plot = plot_orig_v_bucket_conf(metadata, f'δ-{b} GPT-3 (Curie)')
    plot.show()