In [84]:
import pandas as pd
import json

In [85]:
material_data = pd.read_csv("result_data/material_perf_10.csv")

social_data = pd.read_csv("result_data/social_perf_10.csv")

physical_data = pd.read_csv("result_data/physical_perf_10.csv")

In [86]:
material_data[(material_data["avg_binary_score"]==1.0) | (material_data["avg_binary_score"]==0.0)]

Unnamed: 0,truism_number,perturbation,premise,avg_binary_score,avg_ratio_score
0,0,original,original,1.0,0.882697
1,0,original,asymmetric_premise,0.0,-0.695894
2,0,original,asymmetric_conclusion,0.0,-0.745709
3,0,negation,original,0.0,-0.931892
4,0,negation,asymmetric_premise,1.0,0.927463
...,...,...,...,...,...
466,19,negation_paraphrase,asymmetric_premise,1.0,0.617321
467,19,negation_paraphrase,asymmetric_conclusion,1.0,0.637002
468,19,negation_paraphrase_inversion,original,1.0,0.884764
469,19,negation_paraphrase_inversion,asymmetric_premise,0.0,-0.868838


In [87]:
physical_data[(physical_data["avg_binary_score"]==1.0) | (physical_data["avg_binary_score"]==0.0)]

Unnamed: 0,truism_number,perturbation,premise,avg_binary_score,avg_ratio_score
0,0,original,original,1.0,0.761914
1,0,original,asymmetric_premise,0.0,-0.925548
2,0,original,asymmetric_conclusion,0.0,-0.917618
3,0,negation,original,0.0,-0.725308
4,0,negation,asymmetric_premise,1.0,0.626366
...,...,...,...,...,...
472,19,negation_paraphrase,asymmetric_premise,1.0,0.800014
473,19,negation_paraphrase,asymmetric_conclusion,1.0,0.737316
474,19,negation_paraphrase_inversion,original,1.0,0.820279
475,19,negation_paraphrase_inversion,asymmetric_premise,0.0,-0.916605


In [88]:
social_data[(social_data["avg_binary_score"]==1.0) | (social_data["avg_binary_score"]==0.0)]

Unnamed: 0,truism_number,perturbation,premise,avg_binary_score,avg_ratio_score
0,0,original,original,1.0,0.622523
1,0,original,asymmetric_premise,0.0,-0.389334
2,0,original,asymmetric_conclusion,0.0,-0.459831
3,0,negation,original,0.0,-0.219677
4,0,negation,asymmetric_premise,1.0,0.237032
...,...,...,...,...,...
472,19,negation_paraphrase,asymmetric_premise,1.0,0.001274
473,19,negation_paraphrase,asymmetric_conclusion,0.0,-0.007141
474,19,negation_paraphrase_inversion,original,1.0,0.931740
475,19,negation_paraphrase_inversion,asymmetric_premise,0.0,-0.924330


In [89]:
with open("truism_data/physical_data.json", "r") as f:
    physical_params = json.load(f)

with open("truism_data/material_data.json", "r") as f:
    material_params = json.load(f)

with open("truism_data/social_data.json", "r") as f:
    social_params = json.load(f)

In [90]:
physical_params["0"]

{'initial_comparison': 'A is smaller than B',
 'negation_switch': {'0': ['can', 'can not'],
  '1': ['is', 'is not'],
  '2': ['is', 'is not']},
 'antonym_switch': ['fit into', 'contain'],
 'original_comparison': 'can fit into more boxes than',
 'paraphrase': 'is more easily put into a container than',
 'inverted_paraphrase': 'is less able to hold objects than',
 'premise_switch': {'0': ['more', 'less'],
  '1': ['more', 'less'],
  '2': ['less', 'more']},
 'template': '4'}

In [91]:
def get_templates(param_array):
    templates = {}
    for param_dict in param_array:
        for key in param_dict:
            templates[param_dict[key]["template"]] = {}
    return templates

In [92]:
templates = get_templates([physical_params, material_params, social_params])

In [93]:
def get_perturbations(result_data, template_dict):
#     print(result_data)
    for i, row in result_data.iterrows():
#         print(row)
        p_key = row["perturbation"] + "-" + row["premise"]
#         print(p_key)
        for key in template_dict:
            template_dict[key][p_key] = {"accuracy" : 0, "ratio_score" : 0}
    return template_dict

In [94]:
filled_templates = get_perturbations(material_data, templates)

In [95]:
def get_perturbation_order(result_data, loop_count):
    order = []
    for i, row in result_data.iterrows():
        p_key = row["perturbation"] + "-" + row["premise"]
        order.append(p_key)
        if i >= loop_count-1:
            break
    
    return order
        

In [96]:
perturbation_order = get_perturbation_order(material_data, 24)

In [97]:
for key in filled_templates:
    filled_templates[key]["count"] = 0

In [98]:
def fill_pertubation_data(result_data, param_data, template_dict, multiple=1):
    completed_truisms = {}
    
    for i, row in result_data.iterrows():
        p_key = row["perturbation"] + "-" + row["premise"]
        template_key = param_data[str(row["truism_number"])]["template"]
        
        template_dict[template_key][p_key]["accuracy"] += row["avg_binary_score"]*multiple
        template_dict[template_key][p_key]["ratio_score"] += row["avg_ratio_score"]*multiple
        
        if row["truism_number"] not in completed_truisms:
            template_dict[template_key]["count"] += 1*multiple
            completed_truisms[row["truism_number"]] = 1

    return template_dict

In [99]:
filled_templates = fill_pertubation_data(material_data, material_params, filled_templates)
filled_templates = fill_pertubation_data(physical_data, physical_params, filled_templates)
filled_templates = fill_pertubation_data(social_data, social_params, filled_templates)

In [100]:
filled_templates["1"]

{'original-original': {'accuracy': 17.0, 'ratio_score': 10.222966987374242},
 'original-asymmetric_premise': {'accuracy': 6.0,
  'ratio_score': -6.223235449648349},
 'original-asymmetric_conclusion': {'accuracy': 8.0,
  'ratio_score': -5.245776944369478},
 'negation-original': {'accuracy': 4.0, 'ratio_score': -10.424596391103714},
 'negation-asymmetric_premise': {'accuracy': 17.0,
  'ratio_score': 11.970392070741589},
 'negation-asymmetric_conclusion': {'accuracy': 17.0,
  'ratio_score': 11.92414798528836},
 'antonym-original': {'accuracy': 4.0, 'ratio_score': -8.948052846359142},
 'antonym-asymmetric_premise': {'accuracy': 16.0,
  'ratio_score': 8.97016981782059},
 'antonym-asymmetric_conclusion': {'accuracy': 17.0,
  'ratio_score': 10.276061887634144},
 'paraphrase-original': {'accuracy': 17.0, 'ratio_score': 8.839944930501959},
 'paraphrase-asymmetric_premise': {'accuracy': 4.0,
  'ratio_score': -5.615515725852106},
 'paraphrase-asymmetric_conclusion': {'accuracy': 3.0,
  'ratio_sco

In [101]:
averaged_acc_templates = {}
averaged_ratio_templates = {}
for template in filled_templates:
    averaged_acc_templates[template] = {}
    averaged_ratio_templates[template] = {}
    count = float(filled_templates[template]["count"])
    for perturbation in filled_templates[template]:
        if perturbation != "count":
            acc = filled_templates[template][perturbation]["accuracy"]
            score = filled_templates[template][perturbation]["ratio_score"]

            averaged_acc_templates[template][perturbation] = acc / count
            averaged_ratio_templates[template][perturbation] = score/count

In [102]:
averaged_acc_templates["1"]

{'original-original': 0.85,
 'original-asymmetric_premise': 0.3,
 'original-asymmetric_conclusion': 0.4,
 'negation-original': 0.2,
 'negation-asymmetric_premise': 0.85,
 'negation-asymmetric_conclusion': 0.85,
 'antonym-original': 0.2,
 'antonym-asymmetric_premise': 0.8,
 'antonym-asymmetric_conclusion': 0.85,
 'paraphrase-original': 0.85,
 'paraphrase-asymmetric_premise': 0.2,
 'paraphrase-asymmetric_conclusion': 0.15,
 'paraphrase_inversion-original': 0.25,
 'paraphrase_inversion-asymmetric_premise': 0.75,
 'paraphrase_inversion-asymmetric_conclusion': 0.75,
 'negation_antonym-original': 0.85,
 'negation_antonym-asymmetric_premise': 0.2,
 'negation_antonym-asymmetric_conclusion': 0.2,
 'negation_paraphrase-original': 0.15,
 'negation_paraphrase-asymmetric_premise': 0.85,
 'negation_paraphrase-asymmetric_conclusion': 0.85,
 'negation_paraphrase_inversion-original': 0.75,
 'negation_paraphrase_inversion-asymmetric_premise': 0.15,
 'negation_paraphrase_inversion-asymmetric_conclusion':

In [103]:
averaged_ratio_templates["1"]

{'original-original': 0.5111483493687121,
 'original-asymmetric_premise': -0.31116177248241744,
 'original-asymmetric_conclusion': -0.2622888472184739,
 'negation-original': -0.5212298195551857,
 'negation-asymmetric_premise': 0.5985196035370794,
 'negation-asymmetric_conclusion': 0.596207399264418,
 'antonym-original': -0.44740264231795707,
 'antonym-asymmetric_premise': 0.4485084908910295,
 'antonym-asymmetric_conclusion': 0.5138030943817072,
 'paraphrase-original': 0.441997246525098,
 'paraphrase-asymmetric_premise': -0.2807757862926053,
 'paraphrase-asymmetric_conclusion': -0.2940937358550881,
 'paraphrase_inversion-original': -0.32420238259348994,
 'paraphrase_inversion-asymmetric_premise': 0.29134681177421073,
 'paraphrase_inversion-asymmetric_conclusion': 0.3220476745935124,
 'negation_antonym-original': 0.5948179041239514,
 'negation_antonym-asymmetric_premise': -0.4969780261444843,
 'negation_antonym-asymmetric_conclusion': -0.5059907144057421,
 'negation_paraphrase-original':

In [104]:
# physical_data[physical_data["truism_number"] == 1]

In [105]:
def create_tables(averaged_numbers, perturbation_order, threshold):
    output = {}
    output_2 = {}
    sorted_keys = list(averaged_numbers.keys())
    sorted_keys.sort()
    for template in sorted_keys:
        for perturbation in perturbation_order:
            if perturbation in output:
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] <= threshold:
                    output_2[perturbation].append("X")
                else:
                    output_2[perturbation].append("O")
            else:
                output[perturbation] = []
                output_2[perturbation] = []
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] <= threshold:
                    output_2[perturbation].append("X")
                else:
                    output_2[perturbation].append("O")
    
    return (pd.DataFrame.from_dict(output), pd.DataFrame.from_dict(output_2))

In [106]:
accuracy_table, visual_table = create_tables(averaged_acc_templates, perturbation_order, 0.5)


In [107]:
accuracy_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,0.85,0.3,0.4,0.2,0.85,0.85,0.2,0.8,0.85,0.85,...,0.75,0.85,0.2,0.2,0.15,0.85,0.85,0.75,0.15,0.15
1,0.9,0.1,0.1,0.2,0.8,0.8,0.2,0.8,0.8,0.7,...,0.8,0.9,0.1,0.1,0.2,0.7,0.7,0.8,0.1,0.1
2,1.0,0.25,0.25,0.0,1.0,1.0,0.0,1.0,1.0,1.0,...,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
3,0.846154,0.192308,0.192308,0.192308,0.884615,0.884615,0.307692,0.846154,0.884615,0.846154,...,0.615385,0.846154,0.115385,0.115385,0.230769,0.846154,0.846154,0.807692,0.153846,0.192308


In [108]:
visual_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X
1,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X
2,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X
3,O,X,X,X,O,O,X,O,O,O,...,O,O,X,X,X,O,O,O,X,X


In [109]:
pd.DataFrame.to_csv(accuracy_table, "visual_data/accuracy_table.csv", index=False)
pd.DataFrame.to_csv(visual_table, "visual_data/acc_visual_table.csv", index=False)

In [110]:
ratio_table, visual_table = create_tables(averaged_ratio_templates, perturbation_order, 0)


In [111]:
pd.DataFrame.to_csv(ratio_table, "visual_data/ratio_table.csv", index=False)
pd.DataFrame.to_csv(visual_table, "visual_data/ratio_visual_table.csv", index=False)

In [146]:
def aggregate_pertubations(raw_template_data, metric):
    total_count = 0.0
    for template in raw_template_data:
        total_count += raw_template_data[template]["count"]
    
    one_key = list(raw_template_data.keys())[0]
    
    output = {}
    for perturbation in raw_template_data[one_key]:
        if perturbation != "count":
            for template in raw_template_data:
                if perturbation in output:
                    output[perturbation] += raw_template_data[template][perturbation][metric]
                else:
                    output[perturbation] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key] = output[key] / total_count
    
    return output

In [147]:
aggregate_pertubations(filled_templates, "accuracy")

{'original-original': 0.8516666666666667,
 'original-asymmetric_premise': 0.21833333333333332,
 'original-asymmetric_conclusion': 0.21666666666666667,
 'negation-original': 0.18333333333333332,
 'negation-asymmetric_premise': 0.8716666666666666,
 'negation-asymmetric_conclusion': 0.865,
 'antonym-original': 0.2916666666666667,
 'antonym-asymmetric_premise': 0.82,
 'antonym-asymmetric_conclusion': 0.8383333333333333,
 'paraphrase-original': 0.83,
 'paraphrase-asymmetric_premise': 0.18000000000000002,
 'paraphrase-asymmetric_conclusion': 0.18000000000000002,
 'paraphrase_inversion-original': 0.3516666666666666,
 'paraphrase_inversion-asymmetric_premise': 0.7,
 'paraphrase_inversion-asymmetric_conclusion': 0.7150000000000001,
 'negation_antonym-original': 0.8683333333333334,
 'negation_antonym-asymmetric_premise': 0.13833333333333334,
 'negation_antonym-asymmetric_conclusion': 0.14,
 'negation_paraphrase-original': 0.21333333333333332,
 'negation_paraphrase-asymmetric_premise': 0.81,
 'ne

In [74]:
def aggregate_templates(raw_template_data, metric):   
    output = {}
    for template in raw_template_data:
        output[template] = {"count" : 0, "pct" : -1, "total" : -1}
        for perturbation in raw_template_data[template]:
            if perturbation != "count":
#                 if template in output:
                output[template]["count"] += raw_template_data[template][perturbation][metric]
#                 else:
#                     output[template] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key]["total"] = float((len(raw_template_data[key].keys()) - 1) * raw_template_data[key]["count"])
        output[key]["pct"] = output[key]["count"] / output[key]["total"]
    
    return output

In [75]:
aggregate_templates(filled_templates, "accuracy")

{'1': {'count': 243.0, 'pct': 0.50625, 'total': 480.0},
 '2': {'count': 247.0, 'pct': 0.5145833333333333, 'total': 480.0},
 '4': {'count': 51.0, 'pct': 0.53125, 'total': 96.0},
 '5': {'count': 76.0, 'pct': 0.5277777777777778, 'total': 144.0},
 '3': {'count': 116.0, 'pct': 0.48333333333333334, 'total': 240.0}}