In [83]:
import pandas as pd
import json

In [84]:
material_data = pd.read_csv("result_data/Material_10_results", 
                            header=None, 
                            names=["truism_num", "primary_key", "secondary_key", "binary_acc", "score"])

social_data = pd.read_csv("result_data/Social_10_results", 
                          header=None, 
                          names=["truism_num", "primary_key", "secondary_key", "binary_acc", "score"])

physical_data = pd.read_csv("result_data/Physical_10_results", 
                            header=None, 
                            names=["truism_num", "primary_key", "secondary_key", "binary_acc", "score"])

In [85]:
material_data.head()

Unnamed: 0,truism_num,primary_key,secondary_key,binary_acc,score
0,0,original,original,1.0,0.950308
1,0,original,asymmetric_premise,0.0,-4.55333
2,0,original,asymmetric_conclusion,0.0,-4.826838
3,0,negation,original,0.0,-20.375517
4,0,negation,asymmetric_premise,1.0,0.945606


In [86]:
with open("truism_data/physical_data.json", "r") as f:
    physical_params = json.load(f)

with open("truism_data/material_data.json", "r") as f:
    material_params = json.load(f)

with open("truism_data/social_data.json", "r") as f:
    social_params = json.load(f)

In [87]:
physical_params["0"]

{'initial_comparison': 'A is smaller than B',
 'negation_switch': {'0': ['is', 'is not'],
  '1': ['is', 'is not'],
  '2': ['is', 'is not']},
 'antonym_switch': ['fit into', 'contain'],
 'original_comparison': 'is more likely to fit into',
 'paraphrase': 'is more easily put into',
 'inverted_paraphrase': 'is less able to enclose',
 'premise_switch': {'0': ['more', 'less'],
  '1': ['more', 'less'],
  '2': ['less', 'more']},
 'template': '1'}

In [88]:
def get_templates(param_array):
    templates = {}
    for param_dict in param_array:
        for key in param_dict:
            templates[param_dict[key]["template"]] = {}
    return templates

In [89]:
templates = get_templates([physical_params, material_params, social_params])

In [90]:
def get_perturbations(result_data, template_dict):
#     print(result_data)
    for i, row in result_data.iterrows():
#         print(row)
        p_key = row["primary_key"] + "-" + row["secondary_key"]
#         print(p_key)
        for key in template_dict:
            template_dict[key][p_key] = {"accuracy" : 0, "ratio_score" : 0}
    return template_dict

In [91]:
filled_templates = get_perturbations(material_data, templates)

In [92]:
for key in filled_templates:
    filled_templates[key]["count"] = 0

In [93]:
def fill_pertubation_data(result_data, param_data, template_dict):
    completed_truisms = {}
    
    for i, row in result_data.iterrows():
        p_key = row["primary_key"] + "-" + row["secondary_key"]
        template_key = param_data[str(row["truism_num"])]["template"]
        
        template_dict[template_key][p_key]["accuracy"] += row["binary_acc"]
        template_dict[template_key][p_key]["ratio_score"] += row["score"]
        
        if row["truism_num"] not in completed_truisms:
            template_dict[template_key]["count"] += 1
            completed_truisms[row["truism_num"]] = 1

    return template_dict

In [94]:
filled_templates = fill_pertubation_data(material_data, material_params, filled_templates)
filled_templates = fill_pertubation_data(physical_data, physical_params, filled_templates)
filled_templates = fill_pertubation_data(social_data, social_params, filled_templates)

In [156]:
filled_templates

{'1': {'original-original': {'accuracy': 16.1,
   'ratio_score': -145.4653223137211},
  'original-asymmetric_premise': {'accuracy': 4.1,
   'ratio_score': -454.39541705932993},
  'original-asymmetric_conclusion': {'accuracy': 4.3,
   'ratio_score': -402.30832384453055},
  'negation-original': {'accuracy': 3.3, 'ratio_score': -344.49677634726044},
  'negation-asymmetric_premise': {'accuracy': 17.0,
   'ratio_score': -95.67994510868414},
  'negation-asymmetric_conclusion': {'accuracy': 17.0,
   'ratio_score': -96.04185979781762},
  'antonym-original': {'accuracy': 7.6, 'ratio_score': -125.9963486606343},
  'antonym-asymmetric_premise': {'accuracy': 15.9,
   'ratio_score': -112.84764679186065},
  'antonym-asymmetric_conclusion': {'accuracy': 16.2,
   'ratio_score': -113.33778871082728},
  'paraphrase-original': {'accuracy': 16.0, 'ratio_score': -45.26242383109247},
  'paraphrase-asymmetric_premise': {'accuracy': 4.0,
   'ratio_score': -352.05085626357624},
  'paraphrase-asymmetric_conclus

In [100]:
averaged_acc_templates = {}
averaged_ratio_templates = {}
for template in filled_templates:
    averaged_acc_templates[template] = {}
    averaged_ratio_templates[template] = {}
    count = float(filled_templates[template]["count"])
    for perturbation in filled_templates[template]:
        if perturbation != "count":
            acc = filled_templates[template][perturbation]["accuracy"]
            score = filled_templates[template][perturbation]["ratio_score"]

            averaged_acc_templates[template][perturbation] = acc / count
            averaged_ratio_templates[template][perturbation] = score/count

In [115]:
averaged_acc_templates

{'1': {'original-original': 0.805,
  'original-asymmetric_premise': 0.205,
  'original-asymmetric_conclusion': 0.215,
  'negation-original': 0.16499999999999998,
  'negation-asymmetric_premise': 0.85,
  'negation-asymmetric_conclusion': 0.85,
  'antonym-original': 0.38,
  'antonym-asymmetric_premise': 0.795,
  'antonym-asymmetric_conclusion': 0.8099999999999999,
  'paraphrase-original': 0.8,
  'paraphrase-asymmetric_premise': 0.2,
  'paraphrase-asymmetric_conclusion': 0.2,
  'paraphrase_inversion-original': 0.5249999999999999,
  'paraphrase_inversion-asymmetric_premise': 0.6599999999999999,
  'paraphrase_inversion-asymmetric_conclusion': 0.6399999999999999,
  'negation_antonym-original': 0.805,
  'negation_antonym-asymmetric_premise': 0.16499999999999998,
  'negation_antonym-asymmetric_conclusion': 0.16999999999999998,
  'negation_paraphrase-original': 0.205,
  'negation_paraphrase-asymmetric_premise': 0.795,
  'negation_paraphrase-asymmetric_conclusion': 0.8,
  'negation_paraphrase_in

In [108]:
averaged_ratio_templates["1"]

{'original-original': -7.273266115686054,
 'original-asymmetric_premise': -22.719770852966498,
 'original-asymmetric_conclusion': -20.115416192226526,
 'negation-original': -17.224838817363022,
 'negation-asymmetric_premise': -4.7839972554342065,
 'negation-asymmetric_conclusion': -4.802092989890881,
 'antonym-original': -6.2998174330317145,
 'antonym-asymmetric_premise': -5.642382339593032,
 'antonym-asymmetric_conclusion': -5.666889435541364,
 'paraphrase-original': -2.2631211915546237,
 'paraphrase-asymmetric_premise': -17.60254281317881,
 'paraphrase-asymmetric_conclusion': -17.15279543777391,
 'paraphrase_inversion-original': -0.879848399431679,
 'paraphrase_inversion-asymmetric_premise': -15.604732746682945,
 'paraphrase_inversion-asymmetric_conclusion': -14.680013169490607,
 'negation_antonym-original': -5.289014085596174,
 'negation_antonym-asymmetric_premise': -19.04867480903516,
 'negation_antonym-asymmetric_conclusion': -19.827944511979037,
 'negation_paraphrase-original': -

In [118]:
physical_data[physical_data["truism_num"] == 1]

Unnamed: 0,truism_num,primary_key,secondary_key,binary_acc,score
24,1,original,original,1.0,0.583905
25,1,original,asymmetric_premise,0.0,-2.637537
26,1,original,asymmetric_conclusion,0.0,-2.294531
27,1,negation,original,0.0,-3.863461
28,1,negation,asymmetric_premise,1.0,0.83072
29,1,negation,asymmetric_conclusion,1.0,0.80915
30,1,antonym,original,0.8,0.155929
31,1,antonym,asymmetric_premise,1.0,0.411278
32,1,antonym,asymmetric_conclusion,1.0,0.410526
33,1,paraphrase,original,1.0,0.950554


In [131]:
def create_tables(averaged_numbers, threshold):
    output = {}
    output_2 = {}
    sorted_keys = list(averaged_numbers.keys())
    sorted_keys.sort()
    for template in sorted_keys:
        for perturbation in averaged_numbers[template]:
            if perturbation in output:
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] >= threshold:
                    output_2[perturbation].append("X")
                else:
                    output_2[perturbation].append("O")
            else:
                output[perturbation] = []
                output_2[perturbation] = []
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] >= threshold:
                    output_2[perturbation].append("X")
                else:
                    output_2[perturbation].append("O")
    
    return (pd.DataFrame.from_dict(output), pd.DataFrame.from_dict(output_2))

In [134]:
accuracy_table, visual_table = create_tables(averaged_acc_templates, 0.5)

In [135]:
accuracy_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,0.805,0.205,0.215,0.165,0.85,0.85,0.38,0.795,0.81,0.8,...,0.64,0.805,0.165,0.17,0.205,0.795,0.8,0.75,0.2,0.2
1,0.85,0.35,0.335,0.2,0.85,0.85,0.225,0.805,0.8,0.845,...,0.735,0.85,0.2,0.2,0.155,0.835,0.83,0.8,0.15,0.15
2,0.8,0.1,0.1,0.16,0.83,0.81,0.15,0.73,0.83,0.7,...,0.87,0.9,0.1,0.1,0.24,0.65,0.69,0.8,0.1,0.1
3,1.0,0.25,0.25,0.05,1.0,1.0,0.025,1.0,1.0,0.975,...,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.316667,1.0,0.966667,0.633333,0.983333,0.966667,1.0,...,0.45,1.0,0.0,0.0,0.533333,0.916667,0.883333,1.0,0.0,0.0


In [136]:
visual_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,X,O,O,O,X,X,O,X,X,X,...,X,X,O,O,O,X,X,X,O,O
1,X,O,O,O,X,X,O,X,X,X,...,X,X,O,O,O,X,X,X,O,O
2,X,O,O,O,X,X,O,X,X,X,...,X,X,O,O,O,X,X,X,O,O
3,X,O,O,O,X,X,O,X,X,X,...,X,X,O,O,O,X,X,X,O,O
4,X,O,O,O,X,X,X,X,X,X,...,O,X,O,O,X,X,X,X,O,O


In [137]:
pd.DataFrame.to_csv(accuracy_table, "output_data/accuracy_table.csv")
pd.DataFrame.to_csv(visual_table, "output_data/visual_table.csv")

In [146]:
def aggregate_pertubations(raw_template_data, metric):
    total_count = 0.0
    for template in raw_template_data:
        total_count += raw_template_data[template]["count"]
    
    one_key = list(raw_template_data.keys())[0]
    
    output = {}
    for perturbation in raw_template_data[one_key]:
        if perturbation != "count":
            for template in raw_template_data:
                if perturbation in output:
                    output[perturbation] += raw_template_data[template][perturbation][metric]
                else:
                    output[perturbation] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key] = output[key] / total_count
    
    return output

In [147]:
aggregate_pertubations(filled_templates, "accuracy")

{'original-original': 0.8516666666666667,
 'original-asymmetric_premise': 0.21833333333333332,
 'original-asymmetric_conclusion': 0.21666666666666667,
 'negation-original': 0.18333333333333332,
 'negation-asymmetric_premise': 0.8716666666666666,
 'negation-asymmetric_conclusion': 0.865,
 'antonym-original': 0.2916666666666667,
 'antonym-asymmetric_premise': 0.82,
 'antonym-asymmetric_conclusion': 0.8383333333333333,
 'paraphrase-original': 0.83,
 'paraphrase-asymmetric_premise': 0.18000000000000002,
 'paraphrase-asymmetric_conclusion': 0.18000000000000002,
 'paraphrase_inversion-original': 0.3516666666666666,
 'paraphrase_inversion-asymmetric_premise': 0.7,
 'paraphrase_inversion-asymmetric_conclusion': 0.7150000000000001,
 'negation_antonym-original': 0.8683333333333334,
 'negation_antonym-asymmetric_premise': 0.13833333333333334,
 'negation_antonym-asymmetric_conclusion': 0.14,
 'negation_paraphrase-original': 0.21333333333333332,
 'negation_paraphrase-asymmetric_premise': 0.81,
 'ne

In [170]:
def aggregate_templates(raw_template_data, metric):   
    output = {}
    for template in raw_template_data:
        for perturbation in raw_template_data[template]:
            if perturbation != "count":
                if template in output:
                    output[template] += raw_template_data[template][perturbation][metric]
                else:
                    output[template] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key] = output[key] / float((len(raw_template_data[key].keys()) - 1) * raw_template_data[key]["count"])
    
    return output

In [171]:
aggregate_templates(filled_templates, "accuracy")

{'1': 0.5079166666666667,
 '2': 0.5135416666666666,
 '4': 0.5416666666666666,
 '5': 0.5340277777777778,
 '3': 0.465}