# Experimental Evaluation: 
## Goal Recognition in FOND using Conjunctive Goals

In [145]:
BASE_DIR = '../fond-recognition-benchmarks'

In [146]:
import os
import sys
import pandas as pd
import glob
import json
import numpy as np
import itertools

In [147]:
def compute_posterior(index, prob, priors):
    num = prob[index] * priors[index]
    denom = np.sum(prob*priors)
    return num / denom

In [148]:
RESULTS_DIR = os.path.join(BASE_DIR, 'results/')

In [149]:
domains_evaluation = ['blocksworld', 'logistics', 'tidyup', 'tireworld', 'triangle-tireworld', 'zenotravel']
type_goal = 'conjunctive'
json_files = []
for domain in domains_evaluation:
    files = glob.glob(os.path.join(RESULTS_DIR + '/' + domain + '/' + type_goal, '*.json'))
    for f in files:
        json_files.append(f)

In [150]:
print("Results found:", len(json_files))

Results found: 468


# Results Collection

In [151]:
print(json_files[0])
print(os.path.basename(json_files[0]))
filename, ext = os.path.basename(json_files[0]).split('.')

../fond-recognition-benchmarks/results//blocksworld/pltl_since/blocksworld_pltl1_p02_hyp-1_30_1.json
blocksworld_pltl1_p02_hyp-1_30_1.json


In [152]:
tokens = filename.split('_')

In [153]:
dataset = {
    'domain' : [],\
    'problem' : [],\
    'observability' : [],\
    'goals' : [],\
    'judge_point' : [],\
    'obs_len' : [],\
    'true_goal': [],\
    'time': [],\
    'posterior': []}

sum_obs_dict = dict()
problems_obs_dict = dict()

sum_goals = 0

for pathname in json_files:
    filename, ext = os.path.basename(pathname).split('.')
    tokens = filename.split('_')
    approach = tokens[0]
    model_type = tokens[-1]
    data = {}
    with open(pathname) as instream:
        buffer = instream.read()
        data = json.loads(buffer)
    #print(approach, model_type, data['domain'], data['problem'], data['observability'])
    num_goals = len(data["G"])
    true_goal = data["G"].index(data['G*'])
    likelihoods = data['P(Obs | G)']
    sum_goals += num_goals
    
    if data['observability'] in sum_obs_dict:
        problems_obs_dict[data['observability']] += 1
        sum_obs_dict[data['observability']] += len(data['Obs'])
    else:
        problems_obs_dict[data['observability']] = 1
        sum_obs_dict[data['observability']] = len(data['Obs'])
    
    for k, prob_O_G in enumerate(likelihoods):
        post_probs = [compute_posterior(j, prob_O_G, np.ones(num_goals)/num_goals) \
                     for j in range(len(data['G']))]
        dataset['domain'] += [data['domain']]
        dataset['problem'] += [data['problem']]
        dataset['goals'] += [num_goals]
        dataset['judge_point'] += [k]
        dataset['obs_len'] += [len(data['Obs'])]
        dataset['true_goal'] += [true_goal]
        dataset['observability'] += [data['observability']]
        dataset['time'] += [data['time']]
        dataset['posterior'] += [np.array(post_probs)]

avg_obs = 0.0
sum_obs = 0.0
avg_obs_observability = dict()
for k in sum_obs_dict.keys():
    avg = sum_obs_dict[k] / problems_obs_dict[k]
    avg_obs_observability[k] = avg
    sum_obs += avg
    
avg_obs = sum_obs / len(sum_obs_dict)

avg_goals = sum_goals/len(json_files)

In [154]:
dataset = pd.DataFrame(dataset)

In [155]:
def compute_accuracy(dataset, top_k):
    tp_count = 0
    for index, row in dataset.iterrows():
        top_k_goals = np.argpartition(row['posterior'], -top_k)[-top_k:]
        if row['true_goal'] in top_k_goals:
            tp_count += 1
    return tp_count / len(dataset)

In [156]:
def compute_recognition_time(dataset):
    sum_time = 0
    for index, row in dataset.iterrows():
        sum_time += row['time']

    return (sum_time / len(dataset))

In [157]:
def compute_error_rates(dataset):
    tpr = 0.0
    fpr = 0.0
    fnr = 0.0
    f1 = 0.0

    for index, row in dataset.iterrows():
        tp_count = 0
        tn_count = 0
        fp_count = 0
        fn_count = 0

        top_goal = np.argmax(row['posterior'])
        max_goals = [ k for k, p in enumerate(row['posterior']) if p == row['posterior'][top_goal]]
        
        if row['true_goal'] in max_goals:
            tp_count = 1
        fn_count = 1 - tp_count
        fp_count = len(max_goals) - tp_count
        tn_count = row['goals'] - fp_count
        
        f1 += (2 * tp_count) / ((2 * tp_count) + fp_count + fn_count)
    
        tpr += tp_count/(tp_count + fn_count)
        fpr += fp_count/(fp_count + tn_count)
        fnr += fn_count/(fn_count + tp_count)
    return tpr/len(dataset), fpr/len(dataset), fnr/len(dataset), f1/len(dataset)

## Online Goal Recognition

In [158]:
approaches = ['approach']
observabilities = ['10', '25', '30', '50', '70', '75', '100']

summary = {'approach': [], 'observability': [], 'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': [], 'f1': []}

for approach, observability in itertools.product(approaches, observabilities):
    df = dataset.loc[(dataset['observability'] == observability)]
    if len(df) == 0: 
        continue
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr, f1 = compute_error_rates(df)
    summary['approach'] += [approach]
    summary['observability'] += [observability]
    summary['top1'] += [top_1_acc]
    summary['top2'] += [top_2_acc]
    summary['top3'] += [top_3_acc]
    summary['tpr'] += [tpr]
    summary['fpr'] += [fpr]
    summary['fnr'] += [fnr]
    summary['f1'] += [f1]

In [159]:
summary = pd.DataFrame(summary)

In [160]:
summary[['observability', 'top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr', 'f1']]

Unnamed: 0,observability,top1,top2,top3,tpr,fpr,fnr,f1
0,10,0.448276,0.701149,0.827586,0.672414,0.218391,0.327586,0.545019
1,30,0.392405,0.677215,0.820675,0.603376,0.235935,0.396624,0.487623
2,50,0.406504,0.661247,0.804878,0.598916,0.234192,0.401084,0.490289
3,70,0.428434,0.682037,0.823247,0.62536,0.222703,0.37464,0.518988
4,100,0.425577,0.687631,0.821803,0.620545,0.220999,0.379455,0.514745


## Offline Goal Recognition

In [161]:
offline = {'observability': [], 'avg_obs': [], 'time': [], 'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': [], 'f1': []}

for approach, observability in itertools.product(approaches, observabilities):
    df = dataset.loc[(dataset['observability'] == observability)\
                    & (dataset['judge_point'] == dataset['obs_len'] - 1)]
    if len(df) == 0: continue
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr, f1 = compute_error_rates(df)
    avg_time = compute_recognition_time(df)
    offline['observability'] += [observability]
    offline['avg_obs'] += [avg_obs_observability[observability]]
    offline['time'] += [avg_time]
    offline['top1'] += [top_1_acc]
    offline['top2'] += [top_2_acc]
    offline['top3'] += [top_3_acc]
    offline['tpr'] += [tpr]
    offline['fpr'] += [fpr]
    offline['fnr'] += [fnr]
    offline['f1'] += [f1]

In [162]:
offline = pd.DataFrame(offline)
offline[['observability', 'avg_obs', 'time', 'top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr', 'f1']]

Unnamed: 0,observability,avg_obs,time,top1,top2,top3,tpr,fpr,fnr,f1
0,10,1.611111,173.565723,0.564815,0.796296,0.907407,0.759259,0.185185,0.240741,0.643519
1,30,4.388889,173.373043,0.694444,0.888889,0.944444,0.87037,0.12037,0.12963,0.782716
2,50,6.833333,172.961623,0.759259,0.861111,0.935185,0.851852,0.098765,0.148148,0.795062
3,70,9.638889,171.042235,0.842593,0.944444,0.972222,0.972222,0.070988,0.027778,0.911111
4,100,13.25,169.38087,0.916667,0.972222,1.0,0.944444,0.020833,0.055556,0.935185


In [163]:
print('Average observations: %s' % avg_obs)
print('Average goals: %s' % avg_goals)

Average observations: 7.144444444444444
Average goals: 3.9166666666666665


In [164]:
columns = ['observability', 'avg_obs', 'time', 'tpr', 'fpr', 'fnr']
summary4tex = offline[columns]
with open('summary-conjuctive.tex','w') as output :
    summary4tex.to_latex(buf=output,index=False,float_format=lambda x : '{:.2f}'.format(x))

### First Observation

In [165]:
first_obs = {'observability': [], 'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': [], 'f1': []}

for approach, observability in itertools.product(approaches, observabilities):
    df = dataset.loc[(dataset['observability'] == observability)\
                    & (dataset['judge_point'] == 0)]
    if len(df) == 0: continue
    #print(len(df))
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr, f1 = compute_error_rates(df)
    first_obs['observability'] += [observability]
    first_obs['top1'] += [top_1_acc]
    first_obs['top2'] += [top_2_acc]
    first_obs['top3'] += [top_3_acc]
    first_obs['tpr'] += [tpr]
    first_obs['fpr'] += [fpr]
    first_obs['fnr'] += [fnr]
    first_obs['f1'] += [f1]

In [166]:
first_obs = pd.DataFrame(first_obs)
first_obs[['observability', 'top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr', 'f1']]

Unnamed: 0,observability,top1,top2,top3,tpr,fpr,fnr,f1
0,10,0.518519,0.787037,0.87037,0.759259,0.206019,0.240741,0.616049
1,30,0.351852,0.62037,0.787037,0.555556,0.267747,0.444444,0.427778
2,50,0.37963,0.648148,0.777778,0.564815,0.257716,0.435185,0.439506
3,70,0.398148,0.657407,0.768519,0.574074,0.246142,0.425926,0.464506
4,100,0.388889,0.666667,0.777778,0.555556,0.238426,0.444444,0.453704
