In [1]:
import pandas as pd
import pickle5 as pickle
import numpy as np
import seaborn as sns
from collections import defaultdict

import matplotlib.pyplot as plt

import plot

In [None]:
palette = plot.get_categorical_palette()
heuristic = 'Path score heuristic, 0.95'

In [None]:
base_dir = '/Users/rhodos/Desktop/Projects/akbc2021/'
figure_dir = f'{base_dir}figures/figures_for_paper/'
results_dir = f'{base_dir}explanations/linklogic/for_paper/Exp4/results/'

kge_thresh = 0.95
log_thresh = -np.log(1-kge_thresh)

In [None]:
data = {}
with open(f'{results_dir}/4a_100.pickle', 'rb') as f:
    data["Nonsense"] = pickle.load(f)
with open(f'{results_dir}/4b_100.pickle', 'rb') as f:
    data["False Fact"] = pickle.load(f)
with open(f'{results_dir}/4d_100.pickle', 'rb') as f:
    data["True Fact"] = pickle.load(f)

In [None]:
# Get metrics and organize into dataframe
triples, expt, kge_score, fidelity, no_of_features, num_explanations_linklogic, num_explanations_heuristic = [], [], [], [], [], [], []
selected_heuristic_paths = defaultdict(list)
for exp in ['Nonsense', 'False Fact', 'True Fact']:
    selected_heuristic_paths[exp] = list()
    for d in data[exp]:
        expt.append(exp)
        triples.append(d["query_triple"])
        kge_score.append(float(d["query_triple_kge_score"]))
        fidelity.append(d["linklogic_metrics"]["test_acc"])
        num_explanations_linklogic.append(sum([1 if f["coef"] !=0.0 else 0 for f in d["linklogic_features"]]))
        num_heuristic_paths = sum([1 for f in d['linklogic_features'] if f['kge_score']['path_score'] > log_thresh])
        num_explanations_heuristic.append(num_heuristic_paths)
        
df = pd.DataFrame({"Query Triple Category": expt, 
              "Triple": triples,
              "KGE Score for Query Triple": kge_score,
              "linklogic Explanation Fidelity": fidelity,
              "linklogic": num_explanations_linklogic,
              heuristic: num_explanations_heuristic,
              })

In [None]:
id_vars = ['Triple', 'Query Triple Category']
df1 = df[id_vars + ['KGE Score for Query Triple', 'linklogic Explanation Fidelity']]
mdata = df1.melt(id_vars=id_vars)

g = sns.FacetGrid(mdata, col="variable", sharey=False)
_ = g.map_dataframe(sns.boxplot, x="Query Triple Category", y="value", boxprops=dict(alpha=.5), color=palette['generic'])
_ = g.set_xticklabels(size=10)
_ = g.set_titles('')
_ = g.axes[0,0].set_ylabel('KGE Score')
_ = g.axes[0,1].set_ylabel('linklogic Fidelity')
g.savefig(f'{figure_dir}Parsimony_A_B.{plot.figtype}', bbox_inches='tight')

In [None]:
plt.figure(figsize=(5,4))
df2 = df[id_vars + ["linklogic", heuristic]].melt(id_vars=id_vars)
g = sns.boxplot(data=df2, x="Query Triple Category", y="value", hue="variable", 
            palette={'linklogic': palette['linklogic'], heuristic: palette['Heuristic95']},
            boxprops=dict(alpha=.9))
_ = g.set(xlabel=None, ylabel='Explanation Size')
_ = g.legend(title='')
g.get_figure().savefig(f'{figure_dir}Parsimony_C.{plot.figtype}', bbox_inches='tight')

In [None]:
# TODO:
# Make panel B linklogic blue
# Adjust ylim in panel B to max 30