In [1]:
import pickle5 as pickle
import json
import pandas as pd
from tqdm import tqdm
import numpy as np
import random

import eval
import utils as ut
import plot

pd.options.mode.chained_assignment = None # turns off pandas SettingWithCopyWarning

### Run parameters

In [2]:
plot_individual_results = False
kthresh = 10
score_column = f'top{kthresh} path score'
path_column = 'path category'
category_column = 'category'
k_range = list(range(1, 11))
limit_k = True
figure_dir = '../../figures/submission_june_21/'

### Load benchmark and results

In [3]:
# load benchmark
with open('../../data/commonsense_benchmark/v5/commonsense_benchmark_all.json', 'r') as f:
    benchmark = json.load(f)
    
print(f'Loaded benchmark with {len(benchmark)} triples')

Loaded benchmark with 6260 triples


In [4]:
# load explanations
results_dir = '../../explanations/linklogic/full_parents_benchmark/'
experiments = {'sibling false, child true': ['fb13', 'True'],
              'sibling false, child false': ['fb13', 'False'],
              'sibling true, child true': ['fb14', 'True'],
              'sibling true, child false': ['fb14', 'False']}

results = dict()
for experiment, strings in experiments.items():
    print(f'Loading results for experiment {experiment}')
    sibling = strings[0]
    child = strings[1]
    with open(f'{results_dir}/{sibling}_child_{child}.pickle', 'rb') as f:
        results[experiment] = pickle.load(f)

Loading results for experiment sibling false, child true
Loading results for experiment sibling false, child false
Loading results for experiment sibling true, child true
Loading results for experiment sibling true, child false


### Compute Metrics

In [5]:
metrics = []
feature_df_list = []

for i, bmk in tqdm(enumerate(benchmark), total=len(benchmark)):

    assert bmk['category'] == 'parents'
    #assert len(bmk['entity_names']['siblings']) == 1

    triple = bmk['query_triple']
    s_triple = ut.stringify_path(triple)

    bmk_df = eval.extract_bmk_paths_as_df(bmk, ctg_column=category_column)

    for experiment in experiments.keys():

        res = eval.get_results_for_query_triple(results[experiment], triple)
        if not res:
            continue
            
        feature_df = eval.extract_feature_df_from_results(res['linklogic_features'], bmk_df=bmk_df, names=bmk['entity_names'], kthresh=kthresh,
                                                          experiment=experiment, query_triple=s_triple)
        num_true_candidates = feature_df['label'].sum()
        if num_true_candidates == 0:
            continue

        feature_df_list.append(feature_df)
        
        y_true = [feature_df['label']]
        y_true_weighted = [feature_df['bmk confidence']]
        y_heuristic = [feature_df['baseline path score']]
        y_linklogic = [feature_df['coefficient']]
        y_random = [feature_df['random']]
        
        if limit_k:
            ks = eval.filter_k_range(k_range, feature_df['coefficient'])
        else:
            ks = k_range

        metrics.append({'experiment': experiment,
                        'ndcg: heuristic': eval.ndcg_score_range(y_true, y_heuristic, ks),
                        'ndcg: linklogic': eval.ndcg_score_range(y_true, y_linklogic, ks),
                        'ndcg: random': eval.ndcg_score_range(y_true, y_random, ks),
                        
                        'wndcg: heuristic': eval.ndcg_score_range(y_true_weighted, y_heuristic, ks),
                        'wndcg: linklogic': eval.ndcg_score_range(y_true_weighted, y_linklogic, ks),
                        'wndcg: random': eval.ndcg_score_range(y_true_weighted, y_random, ks),
            
                        'num true': num_true_candidates,
                        'query triple': s_triple,
                        'kge score': float(res['query_triple_kge_score']),
                        'fidelity': res['linklogic_metrics']['test_acc']})

        if plot_individual_results:
            fig = px.scatter(feature_df, x='baseline path score', y='coefficient', color='label',
                             hover_data=["path"], title=f'features: {s_triple}')
            fig.show()

100%|██████████| 6260/6260 [10:35<00:00,  9.84it/s]


In [None]:
M = pd.DataFrame(metrics)
M.shape

In [None]:

figure_dir = '../../figures/submission_june_21/'
M.to_csv(f'{figure_dir}/metrics_parents_benchmark_all.tsv', sep='\t', index=False)

In [None]:
heuristic = 'Path score heuristic'

In [None]:
import matplotlib.pyplot as plt

In [None]:
#import seaborn as sns
#sns.scatterplot(data=M, x='ndcg: linklogic', y='wndcg: linklogic')

### Summary metrics: NDCG

In [None]:
def plot_performance_metric(M, experiments=['sibling false, child false'], metric='wndcg', filename=None):
    rows = []
    for i, row in M.iterrows():

        max_k = len(row[f'{metric}: random'])
        ks = k_range[:max_k]

        if row['experiment'] in experiments:
            for k in ks:
                rows.append({
                    'k': str(k),
                    'query triple': row['query triple'],
                    'experiment': row['experiment'],
                    heuristic: row[f'{metric}: heuristic'][k-1],
                    'linklogic': row[f'{metric}: linklogic'][k-1]
                })
    m = pd.DataFrame(rows).melt(id_vars=['k', 'query triple', 'experiment'])

    m = m[m['k'].isin(['1', '2', '3', '4', '5', '6', '7'])]

    pal = plot.get_categorical_palette()
    palette = {'linklogic': pal['linklogic'], heuristic: pal['Heuristic95']}
    plt.figure(figsize=(5,4))
    g = sns.lineplot(data=m, x='k', y='value', hue='variable', hue_order=palette.keys(), palette=palette)
    _ = g.set(ylabel='NDCG@K')
    _ = g.legend(title='')
    
    if filename is not None:
        g.get_figure().savefig(filename, bbox_inches='tight')

In [None]:
plot_performance_metric(M, experiments=['sibling false, child false'], metric='wndcg', filename=f'{figure_dir}NDCG.{plot.figtype}')

In [None]:
rows = []
metric = 'wndcg'
for i, row in M.iterrows():
    
    max_k = len(row[f'{metric}: random'])
    ks = k_range[:max_k]
        
    for k in ks:
        rows.append({
            'k': str(k),
            'query triple': row['query triple'],
            'experiment': row['experiment'],
            heuristic: row[f'{metric}: heuristic'][k-1],
            'linklogic': row[f'{metric}: linklogic'][k-1]
        })
m = pd.DataFrame(rows).melt(id_vars=['k', 'query triple', 'experiment'])

m = m[m['k'].isin(['1', '2', '3', '4', '5', '6', '7'])]

pal = plot.get_categorical_palette()
palette = {'linklogic': pal['linklogic'], heuristic: pal['Heuristic95']}
plt.figure(figsize=(5,4))
g = sns.lineplot(data=m, x='k', y='value', hue='variable', hue_order=palette.keys(), palette=palette)
_ = g.set(ylabel='NDCG@K')
_ = g.legend(title='')
#g.get_figure().savefig(f'{figure_dir}NDCG.{plot.figtype}', bbox_inches='tight')

In [None]:
M = pd.DataFrame(metrics)  
rows = []
metric = 'wndcg'
for i, row in M.iterrows():
    
    max_k = len(row[f'{metric}: random'])
    ks = k_range[:max_k]
        
    for k in ks:
        rows.append({
            'k': str(k),
            'query triple': row['query triple'],
            'experiment': row['experiment'],
            heuristic: row[f'{metric}: heuristic'][k-1],
            'linklogic': row[f'{metric}: linklogic'][k-1]
        })
m = pd.DataFrame(rows).melt(id_vars=['k', 'query triple', 'experiment'])
m.rename(columns={'value': 'NDCG@K', 'variable': 'Method'}, inplace=True)

m = m[m['k'].isin(['1', '2', '3', '4', '5', '6', '7'])]

pal = plot.get_categorical_palette()
palette = {'linklogic': pal['linklogic'], heuristic: pal['Heuristic95']}

sns.set(font_scale=1.2)
g = sns.relplot(data=m, x="k", y="NDCG@K", hue="Method", col="experiment", kind='line')

g.savefig(f'{figure_dir}NDCG_all.{plot.figtype}', bbox_inches='tight')

# Path Reports

In [None]:
fdata = pd.concat(feature_df_list, axis=0)

In [None]:
fdata.shape

In [None]:
#palette = plot.get_categorical_palette(plot=False)
xcol = 'coefficient'
min_count = 150
# palette = {'sibling false, child false':palette['col3'],
#   'sibling false, child true': palette['col6'],
#   'sibling true, child false': palette['col2'],
#   'sibling true, child true': palette['col4']}
palette = {'sibling false, child false':'tab:red',
 'sibling false, child true': 'tab:olive',
 'sibling true, child false': 'mediumorchid',
 'sibling true, child true': 'tab:cyan'}

# query_triples = random.sample(set(fdata.query_triple), 5)

#for experiment in experiments.keys():
    
#     plot.incidents_per_path_category(fdata, experiments=[experiment], min_count=min_count, palette=palette, score_column='coefficient', 
#                                      path_column=path_column, filename=f'{figure_dir}/linklogic: {experiment}.png')

#     plot.incidents_per_path_category(fdata, experiments=[experiment], min_count=min_count, palette=palette, score_column=f'top{kthresh} path score', 
#                                      path_column=path_column, filename=f'{figure_dir}/Heuristic: {experiment}.png')
    
    
   # for triple in query_triples:
   #     plot.scores_per_path_category(fdata, score_column='normalized coefficient', title=f'Path Logic {triple}', experiments=[experiment],
   #                                   filename=f'{figure_dir}linklogic scores {experiment} {triple}.png', min_val = 0.01, palette=palette, query_triple=triple)

   #     plot.scores_per_path_category(fdata, score_column=f'top{kthresh} path score', title=f'Heuristic Scoring, top {kthresh}, {triple}', experiments=[experiment],
   #                                   filename=f'{figure_dir}Heuristic scores {experiment} {triple}.png', min_val=0.1, palette=palette, query_triple=triple)

plot.incidents_per_path_category(fdata, experiments=['sibling false, child true', 'sibling false, child false'], min_count=min_count,
                                 palette=palette, score_column=score_column, path_column=path_column, filename=f'{figure_dir}Exp1_2_NoSibling_RemoveChild.png')
plot.incidents_per_path_category(fdata, experiments=['sibling true, child true', 'sibling true, child false'], min_count=min_count,
                                 palette=palette, score_column=score_column, path_column=path_column, filename=f'{figure_dir}Exp3_WithSibling_RemoveChild.png')
plot.incidents_per_path_category(fdata, experiments=['sibling false, child false', 'sibling true, child false'], min_count=min_count,
                                 palette=palette, score_column=score_column, path_column=path_column, filename=f'{figure_dir}Exp2_3_NoChild_AddSibling.png')
plot.incidents_per_path_category(fdata, experiments=['sibling false, child true', 'sibling true, child true'], min_count=min_count,
                                 palette=palette, score_column=score_column, path_column=path_column, filename=f'{figure_dir}Exp1_3_WithChild_AddSibling.png')

In [None]:
fdata.to_csv(f'{figure_dir}/all_feature_data_for_parents_path_reports.tsv', sep='\t', index=False)

In [None]:
#P = M[M.experiment == 'sibling false, child false']
#P.to_csv('sibling_false_child_false_query_triple_metrics.tsv',sep='\t', index=None)
# rows = []
# for i, row in P.iterrows():
#     row_metrics = row['ndcg: linklogic']
#     if len(row_metrics) >= 5:        
#          rows.append({
#              'query_triple': row['query triple'],
#              'ndcg': row_metrics[4],
#              'fidelity': row['fidelity']
#          })
# triple_metrics = pd.DataFrame(rows)
# triple_metrics.to_csv('triple_metrics.tsv', sep='\t')