In [2]:
import pandas as pd
import json

In [15]:
material_data = pd.read_csv("result_data/material_perf_10.csv")

social_data = pd.read_csv("result_data/social_perf_10.csv")

physical_data = pd.read_csv("result_data/physical_perf_10.csv")

In [16]:
material_data.head()

Unnamed: 0,truism_number,perturbation,premise,avg_binary_score,avg_ratio_score
0,0,original,original,1.0,0.882697
1,0,original,asymmetric_premise,0.0,-0.695894
2,0,original,asymmetric_conclusion,0.0,-0.745709
3,0,negation,original,0.0,-0.931892
4,0,negation,asymmetric_premise,1.0,0.927463


In [17]:
with open("truism_data/physical_data.json", "r") as f:
    physical_params = json.load(f)

with open("truism_data/material_data.json", "r") as f:
    material_params = json.load(f)

with open("truism_data/social_data.json", "r") as f:
    social_params = json.load(f)

In [18]:
physical_params["0"]

{'initial_comparison': 'A is smaller than B',
 'negation_switch': {'0': ['is', 'is not'],
  '1': ['is', 'is not'],
  '2': ['is', 'is not']},
 'antonym_switch': ['fit into', 'contain'],
 'original_comparison': 'is more likely to fit into',
 'paraphrase': 'is more easily put into',
 'inverted_paraphrase': 'is less able to enclose',
 'premise_switch': {'0': ['more', 'less'],
  '1': ['more', 'less'],
  '2': ['less', 'more']},
 'template': '1'}

In [19]:
def get_templates(param_array):
    templates = {}
    for param_dict in param_array:
        for key in param_dict:
            templates[param_dict[key]["template"]] = {}
    return templates

In [40]:
templates = get_templates([physical_params, material_params, social_params])

In [41]:
def get_perturbations(result_data, template_dict):
#     print(result_data)
    for i, row in result_data.iterrows():
#         print(row)
        p_key = row["perturbation"] + "-" + row["premise"]
#         print(p_key)
        for key in template_dict:
            template_dict[key][p_key] = {"accuracy" : 0, "ratio_score" : 0}
    return template_dict

In [42]:
filled_templates = get_perturbations(material_data, templates)

In [43]:
def get_perturbation_order(result_data, loop_count):
    order = []
    for i, row in result_data.iterrows():
        p_key = row["perturbation"] + "-" + row["premise"]
        order.append(p_key)
        if i >= loop_count-1:
            break
    
    return order
        

In [61]:
perturbation_order = get_perturbation_order(material_data, 24)

In [45]:
for key in filled_templates:
    filled_templates[key]["count"] = 0

In [46]:
def fill_pertubation_data(result_data, param_data, template_dict):
    completed_truisms = {}
    
    for i, row in result_data.iterrows():
        p_key = row["perturbation"] + "-" + row["premise"]
        template_key = param_data[str(row["truism_number"])]["template"]
        
        template_dict[template_key][p_key]["accuracy"] += row["avg_binary_score"]
        template_dict[template_key][p_key]["ratio_score"] += row["avg_ratio_score"]
        
        if row["truism_number"] not in completed_truisms:
            template_dict[template_key]["count"] += 1
            completed_truisms[row["truism_number"]] = 1

    return template_dict

In [47]:
filled_templates = fill_pertubation_data(material_data, material_params, filled_templates)
filled_templates = fill_pertubation_data(physical_data, physical_params, filled_templates)
filled_templates = fill_pertubation_data(social_data, social_params, filled_templates)

In [60]:
filled_templates["1"]

{'original-original': {'accuracy': 15.0, 'ratio_score': 8.194175335426578},
 'original-asymmetric_premise': {'accuracy': 5.0,
  'ratio_score': -7.871824809742952},
 'original-asymmetric_conclusion': {'accuracy': 5.0,
  'ratio_score': -7.96346966685271},
 'negation-original': {'accuracy': 3.0, 'ratio_score': -10.79495533603939},
 'negation-asymmetric_premise': {'accuracy': 17.0,
  'ratio_score': 11.114143165555735},
 'negation-asymmetric_conclusion': {'accuracy': 17.0,
  'ratio_score': 10.712765032296788},
 'antonym-original': {'accuracy': 6.0, 'ratio_score': -3.8139817801869027},
 'antonym-asymmetric_premise': {'accuracy': 16.0,
  'ratio_score': 6.871026972956518},
 'antonym-asymmetric_conclusion': {'accuracy': 17.0,
  'ratio_score': 7.076741212281882},
 'paraphrase-original': {'accuracy': 16.0, 'ratio_score': 10.675792976143313},
 'paraphrase-asymmetric_premise': {'accuracy': 4.0,
  'ratio_score': -8.291984361243133},
 'paraphrase-asymmetric_conclusion': {'accuracy': 4.0,
  'ratio_sco

In [49]:
averaged_acc_templates = {}
averaged_ratio_templates = {}
for template in filled_templates:
    averaged_acc_templates[template] = {}
    averaged_ratio_templates[template] = {}
    count = float(filled_templates[template]["count"])
    for perturbation in filled_templates[template]:
        if perturbation != "count":
            acc = filled_templates[template][perturbation]["accuracy"]
            score = filled_templates[template][perturbation]["ratio_score"]

            averaged_acc_templates[template][perturbation] = acc / count
            averaged_ratio_templates[template][perturbation] = score/count

In [53]:
averaged_acc_templates["1"]

{'original-original': 0.75,
 'original-asymmetric_premise': 0.25,
 'original-asymmetric_conclusion': 0.25,
 'negation-original': 0.15,
 'negation-asymmetric_premise': 0.85,
 'negation-asymmetric_conclusion': 0.85,
 'antonym-original': 0.3,
 'antonym-asymmetric_premise': 0.8,
 'antonym-asymmetric_conclusion': 0.85,
 'paraphrase-original': 0.8,
 'paraphrase-asymmetric_premise': 0.2,
 'paraphrase-asymmetric_conclusion': 0.2,
 'paraphrase_inversion-original': 0.6,
 'paraphrase_inversion-asymmetric_premise': 0.65,
 'paraphrase_inversion-asymmetric_conclusion': 0.65,
 'negation_antonym-original': 0.8,
 'negation_antonym-asymmetric_premise': 0.15,
 'negation_antonym-asymmetric_conclusion': 0.15,
 'negation_paraphrase-original': 0.2,
 'negation_paraphrase-asymmetric_premise': 0.7,
 'negation_paraphrase-asymmetric_conclusion': 0.8,
 'negation_paraphrase_inversion-original': 0.65,
 'negation_paraphrase_inversion-asymmetric_premise': 0.25,
 'negation_paraphrase_inversion-asymmetric_conclusion': 0

In [54]:
averaged_ratio_templates["1"]

{'original-original': 0.40970876677132895,
 'original-asymmetric_premise': -0.3935912404871476,
 'original-asymmetric_conclusion': -0.39817348334263547,
 'negation-original': -0.5397477668019695,
 'negation-asymmetric_premise': 0.5557071582777867,
 'negation-asymmetric_conclusion': 0.5356382516148395,
 'antonym-original': -0.19069908900934512,
 'antonym-asymmetric_premise': 0.3435513486478259,
 'antonym-asymmetric_conclusion': 0.3538370606140941,
 'paraphrase-original': 0.5337896488071656,
 'paraphrase-asymmetric_premise': -0.4145992180621566,
 'paraphrase-asymmetric_conclusion': -0.46326272374184124,
 'paraphrase_inversion-original': 0.11542707782904119,
 'paraphrase_inversion-asymmetric_premise': 0.11464637470357153,
 'paraphrase_inversion-asymmetric_conclusion': 0.11532817782136724,
 'negation_antonym-original': 0.5267903921587762,
 'negation_antonym-asymmetric_premise': -0.557943763774374,
 'negation_antonym-asymmetric_conclusion': -0.5692534190094112,
 'negation_paraphrase-origina

In [57]:
# physical_data[physical_data["truism_number"] == 1]

In [62]:
def create_tables(averaged_numbers, perturbation_order, threshold):
    output = {}
    output_2 = {}
    sorted_keys = list(averaged_numbers.keys())
    sorted_keys.sort()
    for template in sorted_keys:
        for perturbation in perturbation_order:
            if perturbation in output:
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] <= threshold:
                    output_2[perturbation].append("X")
                else:
                    output_2[perturbation].append("O")
            else:
                output[perturbation] = []
                output_2[perturbation] = []
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] <= threshold:
                    output_2[perturbation].append("X")
                else:
                    output_2[perturbation].append("O")
    
    return (pd.DataFrame.from_dict(output), pd.DataFrame.from_dict(output_2))

In [63]:
accuracy_table, visual_table = create_tables(averaged_acc_templates, perturbation_order, 0.5)


In [64]:
accuracy_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,0.75,0.25,0.25,0.15,0.85,0.85,0.3,0.8,0.85,0.8,...,0.65,0.8,0.15,0.15,0.2,0.7,0.8,0.65,0.25,0.3
1,0.85,0.3,0.4,0.2,0.85,0.85,0.2,0.8,0.85,0.8,...,0.75,0.85,0.2,0.2,0.15,0.8,0.85,0.75,0.2,0.15
2,0.8,0.2,0.2,0.4,0.8,0.7,0.3,0.7,0.7,0.6,...,0.8,0.6,0.3,0.3,0.3,0.6,0.7,0.7,0.2,0.2
3,1.0,0.25,0.25,0.0,1.0,1.0,0.0,1.0,1.0,1.0,...,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.333333,1.0,1.0,0.333333,1.0,1.0,1.0,...,0.5,1.0,0.0,0.0,0.333333,1.0,1.0,1.0,0.0,0.0


In [65]:
visual_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X
1,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X
2,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X
3,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X
4,O,X,X,X,O,O,X,O,O,O,...,X,O,X,X,X,O,O,O,X,X


In [68]:
pd.DataFrame.to_csv(accuracy_table, "output_data/accuracy_table.csv", index=False)
pd.DataFrame.to_csv(visual_table, "output_data/visual_table.csv", index=False)

In [67]:
ratio_table, visual_table = create_tables(averaged_ratio_templates, perturbation_order, 0)


In [None]:
pd.DataFrame.to_csv(ratio_table, "output_data/accuracy_table.csv", index=False)
pd.DataFrame.to_csv(visual_table, "output_data/visual_table.csv", index=False)

In [146]:
def aggregate_pertubations(raw_template_data, metric):
    total_count = 0.0
    for template in raw_template_data:
        total_count += raw_template_data[template]["count"]
    
    one_key = list(raw_template_data.keys())[0]
    
    output = {}
    for perturbation in raw_template_data[one_key]:
        if perturbation != "count":
            for template in raw_template_data:
                if perturbation in output:
                    output[perturbation] += raw_template_data[template][perturbation][metric]
                else:
                    output[perturbation] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key] = output[key] / total_count
    
    return output

In [147]:
aggregate_pertubations(filled_templates, "accuracy")

{'original-original': 0.8516666666666667,
 'original-asymmetric_premise': 0.21833333333333332,
 'original-asymmetric_conclusion': 0.21666666666666667,
 'negation-original': 0.18333333333333332,
 'negation-asymmetric_premise': 0.8716666666666666,
 'negation-asymmetric_conclusion': 0.865,
 'antonym-original': 0.2916666666666667,
 'antonym-asymmetric_premise': 0.82,
 'antonym-asymmetric_conclusion': 0.8383333333333333,
 'paraphrase-original': 0.83,
 'paraphrase-asymmetric_premise': 0.18000000000000002,
 'paraphrase-asymmetric_conclusion': 0.18000000000000002,
 'paraphrase_inversion-original': 0.3516666666666666,
 'paraphrase_inversion-asymmetric_premise': 0.7,
 'paraphrase_inversion-asymmetric_conclusion': 0.7150000000000001,
 'negation_antonym-original': 0.8683333333333334,
 'negation_antonym-asymmetric_premise': 0.13833333333333334,
 'negation_antonym-asymmetric_conclusion': 0.14,
 'negation_paraphrase-original': 0.21333333333333332,
 'negation_paraphrase-asymmetric_premise': 0.81,
 'ne

In [170]:
def aggregate_templates(raw_template_data, metric):   
    output = {}
    for template in raw_template_data:
        for perturbation in raw_template_data[template]:
            if perturbation != "count":
                if template in output:
                    output[template] += raw_template_data[template][perturbation][metric]
                else:
                    output[template] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key] = output[key] / float((len(raw_template_data[key].keys()) - 1) * raw_template_data[key]["count"])
    
    return output

In [171]:
aggregate_templates(filled_templates, "accuracy")

{'1': 0.5079166666666667,
 '2': 0.5135416666666666,
 '4': 0.5416666666666666,
 '5': 0.5340277777777778,
 '3': 0.465}