In [1]:
import pandas as pd
import json

In [2]:
material_data = pd.read_csv("result_data/material_perf_10.csv")

social_data = pd.read_csv("result_data/social_perf_10.csv")

physical_data = pd.read_csv("result_data/physical_perf_10.csv")

In [3]:
material_data[(material_data["avg_binary_score"]==1.0) | (material_data["avg_binary_score"]==0.0)]

Unnamed: 0,truism_number,perturbation,premise,avg_binary_score,avg_ratio_score
0,0,original,original,1.0,0.882697
1,0,original,asymmetric_premise,0.0,-0.695894
2,0,original,asymmetric_conclusion,0.0,-0.745709
3,0,negation,original,0.0,-0.931892
4,0,negation,asymmetric_premise,1.0,0.927463
...,...,...,...,...,...
466,19,negation_paraphrase,asymmetric_premise,1.0,0.617321
467,19,negation_paraphrase,asymmetric_conclusion,1.0,0.637002
468,19,negation_paraphrase_inversion,original,1.0,0.884764
469,19,negation_paraphrase_inversion,asymmetric_premise,0.0,-0.868838


In [4]:
physical_data[(physical_data["avg_binary_score"]==1.0) | (physical_data["avg_binary_score"]==0.0)]

Unnamed: 0,truism_number,perturbation,premise,avg_binary_score,avg_ratio_score
0,0,original,original,1.0,0.761914
1,0,original,asymmetric_premise,0.0,-0.925548
2,0,original,asymmetric_conclusion,0.0,-0.917618
3,0,negation,original,0.0,-0.725308
4,0,negation,asymmetric_premise,1.0,0.626366
...,...,...,...,...,...
472,19,negation_paraphrase,asymmetric_premise,0.0,-0.793112
473,19,negation_paraphrase,asymmetric_conclusion,0.0,-0.669244
474,19,negation_paraphrase_inversion,original,0.0,-0.780922
475,19,negation_paraphrase_inversion,asymmetric_premise,1.0,0.883004


In [5]:
social_data[(social_data["avg_binary_score"]==1.0) | (social_data["avg_binary_score"]==0.0)]

Unnamed: 0,truism_number,perturbation,premise,avg_binary_score,avg_ratio_score
0,0,original,original,1.0,0.622523
1,0,original,asymmetric_premise,0.0,-0.389334
2,0,original,asymmetric_conclusion,0.0,-0.459831
3,0,negation,original,0.0,-0.219677
4,0,negation,asymmetric_premise,1.0,0.237032
...,...,...,...,...,...
472,19,negation_paraphrase,asymmetric_premise,1.0,0.001274
473,19,negation_paraphrase,asymmetric_conclusion,0.0,-0.007141
474,19,negation_paraphrase_inversion,original,1.0,0.931740
475,19,negation_paraphrase_inversion,asymmetric_premise,0.0,-0.924330


In [6]:
with open("truism_data/physical_data.json", "r") as f:
    physical_params = json.load(f)

with open("truism_data/material_data.json", "r") as f:
    material_params = json.load(f)

with open("truism_data/social_data.json", "r") as f:
    social_params = json.load(f)

In [7]:
physical_params["0"]

{'initial_comparison': 'A is smaller than B',
 'negation_switch': {'0': ['can', 'can not'],
  '1': ['is', 'is not'],
  '2': ['is', 'is not']},
 'antonym_switch': ['fit into', 'contain'],
 'original_comparison': 'can fit into more boxes than',
 'paraphrase': 'is more easily put into a container than',
 'inverted_paraphrase': 'is less able to hold objects than',
 'premise_switch': {'0': ['more', 'less'],
  '1': ['more', 'less'],
  '2': ['less', 'more']},
 'template': '4',
 'is_more': True}

In [8]:
def get_templates(param_array):
    templates = {}
    for param_dict in param_array:
        for key in param_dict:
            templates[param_dict[key]["template"]] = {}
    return templates

In [9]:
templates = get_templates([physical_params, material_params, social_params])

In [10]:
def get_perturbations(result_data, template_dict):
#     print(result_data)
    for i, row in result_data.iterrows():
#         print(row)
        p_key = row["perturbation"] + "-" + row["premise"]
#         print(p_key)
        for key in template_dict:
            template_dict[key][p_key] = {"accuracy" : 0, "ratio_score" : 0}
    return template_dict

In [11]:
filled_templates = get_perturbations(material_data, templates)

In [12]:
def get_perturbation_order(result_data, loop_count):
    order = []
    for i, row in result_data.iterrows():
        p_key = row["perturbation"] + "-" + row["premise"]
        order.append(p_key)
        if i >= loop_count-1:
            break
    
    return order
        

In [13]:
perturbation_order = get_perturbation_order(material_data, 24)

In [14]:
for key in filled_templates:
    filled_templates[key]["count"] = 0

In [15]:
def fill_pertubation_data(result_data, param_data, template_dict, multiple=1):
    completed_truisms = {}
    
    for i, row in result_data.iterrows():
        p_key = row["perturbation"] + "-" + row["premise"]
        template_key = param_data[str(row["truism_number"])]["template"]
        
        template_dict[template_key][p_key]["accuracy"] += row["avg_binary_score"]*multiple
        template_dict[template_key][p_key]["ratio_score"] += row["avg_ratio_score"]*multiple
        
        if row["truism_number"] not in completed_truisms:
            template_dict[template_key]["count"] += 1*multiple
            completed_truisms[row["truism_number"]] = 1

    return template_dict

In [16]:
filled_templates = fill_pertubation_data(material_data, material_params, filled_templates)
filled_templates = fill_pertubation_data(physical_data, physical_params, filled_templates)
filled_templates = fill_pertubation_data(social_data, social_params, filled_templates)

In [17]:
filled_templates["1"]

{'original-original': {'accuracy': 12.0, 'ratio_score': 2.2050088888176433},
 'original-asymmetric_premise': {'accuracy': 10.0,
  'ratio_score': -0.4593210461665538},
 'original-asymmetric_conclusion': {'accuracy': 11.0,
  'ratio_score': -0.8332751293156686},
 'negation-original': {'accuracy': 10.0, 'ratio_score': -0.5616735949242873},
 'negation-asymmetric_premise': {'accuracy': 10.0,
  'ratio_score': 0.27041934324272143},
 'negation-asymmetric_conclusion': {'accuracy': 10.0,
  'ratio_score': 0.3144640408676658},
 'antonym-original': {'accuracy': 9.0, 'ratio_score': -2.265454904353624},
 'antonym-asymmetric_premise': {'accuracy': 9.0,
  'ratio_score': 0.3292708746485742},
 'antonym-asymmetric_conclusion': {'accuracy': 9.0,
  'ratio_score': 0.41091662486559155},
 'paraphrase-original': {'accuracy': 11.0, 'ratio_score': 1.931505029112658},
 'paraphrase-asymmetric_premise': {'accuracy': 10.0,
  'ratio_score': 0.5749679035622391},
 'paraphrase-asymmetric_conclusion': {'accuracy': 9.0,
  '

In [18]:
averaged_acc_templates = {}
averaged_ratio_templates = {}
for template in filled_templates:
    averaged_acc_templates[template] = {}
    averaged_ratio_templates[template] = {}
    count = float(filled_templates[template]["count"])
    for perturbation in filled_templates[template]:
        if perturbation != "count":
            acc = filled_templates[template][perturbation]["accuracy"]
            score = filled_templates[template][perturbation]["ratio_score"]

            averaged_acc_templates[template][perturbation] = acc / count
            averaged_ratio_templates[template][perturbation] = score/count

In [19]:
averaged_acc_templates["1"]

{'original-original': 0.6,
 'original-asymmetric_premise': 0.5,
 'original-asymmetric_conclusion': 0.55,
 'negation-original': 0.5,
 'negation-asymmetric_premise': 0.5,
 'negation-asymmetric_conclusion': 0.5,
 'antonym-original': 0.45,
 'antonym-asymmetric_premise': 0.45,
 'antonym-asymmetric_conclusion': 0.45,
 'paraphrase-original': 0.55,
 'paraphrase-asymmetric_premise': 0.5,
 'paraphrase-asymmetric_conclusion': 0.45,
 'paraphrase_inversion-original': 0.4,
 'paraphrase_inversion-asymmetric_premise': 0.55,
 'paraphrase_inversion-asymmetric_conclusion': 0.55,
 'negation_antonym-original': 0.5,
 'negation_antonym-asymmetric_premise': 0.5,
 'negation_antonym-asymmetric_conclusion': 0.5,
 'negation_paraphrase-original': 0.45,
 'negation_paraphrase-asymmetric_premise': 0.45,
 'negation_paraphrase-asymmetric_conclusion': 0.45,
 'negation_paraphrase_inversion-original': 0.55,
 'negation_paraphrase_inversion-asymmetric_premise': 0.4,
 'negation_paraphrase_inversion-asymmetric_conclusion': 0.

In [20]:
averaged_ratio_templates["1"]

{'original-original': 0.11025044444088217,
 'original-asymmetric_premise': -0.02296605230832769,
 'original-asymmetric_conclusion': -0.04166375646578343,
 'negation-original': -0.02808367974621436,
 'negation-asymmetric_premise': 0.013520967162136072,
 'negation-asymmetric_conclusion': 0.01572320204338329,
 'antonym-original': -0.1132727452176812,
 'antonym-asymmetric_premise': 0.01646354373242871,
 'antonym-asymmetric_conclusion': 0.020545831243279578,
 'paraphrase-original': 0.09657525145563291,
 'paraphrase-asymmetric_premise': 0.02874839517811196,
 'paraphrase-asymmetric_conclusion': -0.002577780670304197,
 'paraphrase_inversion-original': -0.07715610935566955,
 'paraphrase_inversion-asymmetric_premise': 0.008188250605994523,
 'paraphrase_inversion-asymmetric_conclusion': 0.04618504161949474,
 'negation_antonym-original': 0.017769055474487667,
 'negation_antonym-asymmetric_premise': -0.02802506707461963,
 'negation_antonym-asymmetric_conclusion': -0.03404924240466309,
 'negation_pa

In [21]:
# physical_data[physical_data["truism_number"] == 1]

In [38]:
def create_tables(averaged_numbers, perturbation_order, threshold):
    output = {}
    output_2 = {}
    sorted_keys = list(averaged_numbers.keys())
    sorted_keys.sort()
    for template in sorted_keys:
        for perturbation in perturbation_order:
            if perturbation in output:
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] <= threshold:
                    output_2[perturbation].append("X")
                else:
                    output_2[perturbation].append("O")
            else:
                output[perturbation] = []
                output_2[perturbation] = []
                output[perturbation].append(averaged_numbers[template][perturbation])
                if averaged_numbers[template][perturbation] < threshold:
                    output_2[perturbation].append("X")
                elif averaged_numbers[template][perturbation] == threshold:
                    output_2[perturbation].append("---")
                else:
                    output_2[perturbation].append("O")
    
    return (pd.DataFrame.from_dict(output), pd.DataFrame.from_dict(output_2))

In [39]:
accuracy_table, visual_table = create_tables(averaged_acc_templates, perturbation_order, 0.5)


In [40]:
accuracy_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,0.6,0.5,0.55,0.5,0.5,0.5,0.45,0.45,0.45,0.55,...,0.55,0.5,0.5,0.5,0.45,0.45,0.45,0.55,0.4,0.4
1,0.5,0.5,0.5,0.6,0.4,0.4,0.6,0.4,0.4,0.5,...,0.5,0.5,0.5,0.5,0.5,0.4,0.3,0.5,0.4,0.4
2,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.75,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
3,0.615385,0.538462,0.5,0.5,0.538462,0.538462,0.5,0.461538,0.538462,0.692308,...,0.538462,0.5,0.461538,0.5,0.576923,0.461538,0.461538,0.615385,0.423077,0.423077


In [41]:
visual_table

Unnamed: 0,original-original,original-asymmetric_premise,original-asymmetric_conclusion,negation-original,negation-asymmetric_premise,negation-asymmetric_conclusion,antonym-original,antonym-asymmetric_premise,antonym-asymmetric_conclusion,paraphrase-original,...,paraphrase_inversion-asymmetric_conclusion,negation_antonym-original,negation_antonym-asymmetric_premise,negation_antonym-asymmetric_conclusion,negation_paraphrase-original,negation_paraphrase-asymmetric_premise,negation_paraphrase-asymmetric_conclusion,negation_paraphrase_inversion-original,negation_paraphrase_inversion-asymmetric_premise,negation_paraphrase_inversion-asymmetric_conclusion
0,O,---,O,---,---,---,X,X,X,O,...,O,---,---,---,X,X,X,O,X,X
1,X,X,X,O,X,X,O,X,X,X,...,X,X,X,X,X,X,X,X,X,X
2,X,X,X,X,X,X,X,X,O,X,...,X,X,X,X,X,X,X,X,X,X
3,O,O,X,X,O,O,X,X,O,O,...,O,X,X,X,O,X,X,O,X,X


In [42]:
pd.DataFrame.to_csv(accuracy_table, "visual_data/accuracy_table.csv", index=False)
pd.DataFrame.to_csv(visual_table, "visual_data/acc_visual_table.csv", index=False)

In [43]:
ratio_table, visual_table = create_tables(averaged_ratio_templates, perturbation_order, 0)


In [44]:
pd.DataFrame.to_csv(ratio_table, "visual_data/ratio_table.csv", index=False)
pd.DataFrame.to_csv(visual_table, "visual_data/ratio_visual_table.csv", index=False)

In [45]:
def aggregate_pertubations(raw_template_data, metric):
    total_count = 0.0
    for template in raw_template_data:
        total_count += raw_template_data[template]["count"]
    
    one_key = list(raw_template_data.keys())[0]
    
    output = {}
    for perturbation in raw_template_data[one_key]:
        if perturbation != "count":
            for template in raw_template_data:
                if perturbation in output:
                    output[perturbation] += raw_template_data[template][perturbation][metric]
                else:
                    output[perturbation] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key] = output[key] / total_count
    
    return output

In [46]:
aggregate_pertubations(filled_templates, "accuracy")

{'original-original': 0.5833333333333334,
 'original-asymmetric_premise': 0.5166666666666667,
 'original-asymmetric_conclusion': 0.5166666666666667,
 'negation-original': 0.5166666666666667,
 'negation-asymmetric_premise': 0.5,
 'negation-asymmetric_conclusion': 0.5,
 'antonym-original': 0.5,
 'antonym-asymmetric_premise': 0.45,
 'antonym-asymmetric_conclusion': 0.5,
 'paraphrase-original': 0.6,
 'paraphrase-asymmetric_premise': 0.45,
 'paraphrase-asymmetric_conclusion': 0.45,
 'paraphrase_inversion-original': 0.4666666666666667,
 'paraphrase_inversion-asymmetric_premise': 0.55,
 'paraphrase_inversion-asymmetric_conclusion': 0.5333333333333333,
 'negation_antonym-original': 0.5,
 'negation_antonym-asymmetric_premise': 0.48333333333333334,
 'negation_antonym-asymmetric_conclusion': 0.5,
 'negation_paraphrase-original': 0.5166666666666667,
 'negation_paraphrase-asymmetric_premise': 0.45,
 'negation_paraphrase-asymmetric_conclusion': 0.43333333333333335,
 'negation_paraphrase_inversion-or

In [47]:
def aggregate_templates(raw_template_data, metric):   
    output = {}
    for template in raw_template_data:
        output[template] = {"count" : 0, "pct" : -1, "total" : -1}
        for perturbation in raw_template_data[template]:
            if perturbation != "count":
#                 if template in output:
                output[template]["count"] += raw_template_data[template][perturbation][metric]
#                 else:
#                     output[template] = raw_template_data[template][perturbation][metric]
    
    for key in output:
        output[key]["total"] = float((len(raw_template_data[key].keys()) - 1) * raw_template_data[key]["count"])
        output[key]["pct"] = output[key]["count"] / output[key]["total"]
    
    return output

In [75]:
aggregate_templates(filled_templates, "accuracy")

{'1': {'count': 243.0, 'pct': 0.50625, 'total': 480.0},
 '2': {'count': 247.0, 'pct': 0.5145833333333333, 'total': 480.0},
 '4': {'count': 51.0, 'pct': 0.53125, 'total': 96.0},
 '5': {'count': 76.0, 'pct': 0.5277777777777778, 'total': 144.0},
 '3': {'count': 116.0, 'pct': 0.48333333333333334, 'total': 240.0}}