# Experimental Evaluation: 
## Goal Recognition in FOND using Goals Formalized in PLTLf (Once)

In [106]:
BASE_DIR = '../fond-recognition-benchmarks'

In [107]:
import os
import sys
import pandas as pd
import glob
import json
import numpy as np
import itertools

In [108]:
def compute_posterior(index, prob, priors):
    num = prob[index] * priors[index]
    denom = np.sum(prob*priors)
    return num / denom

In [109]:
RESULTS_DIR = os.path.join(BASE_DIR, 'results/')

In [110]:
domains_evaluation = ['blocksworld', 'logistics', 'tidyup', 'tireworld', 'triangle-tireworld', 'zenotravel']
type_goal = 'pltl_once'
json_files = []
for domain in domains_evaluation:
    files = glob.glob(os.path.join(RESULTS_DIR + '/' + domain + '/' + type_goal, '*.json'))
    for f in files:
        json_files.append(f)

In [111]:
print("Results found:", len(json_files))

Results found: 468


# Results Collection

In [112]:
print(json_files[0])
print(os.path.basename(json_files[0]))
filename, ext = os.path.basename(json_files[0]).split('.')

../fond-recognition-benchmarks/results//blocksworld/pltl_once/blocksworld_pltl0_p02_hyp-2_full.json
blocksworld_pltl0_p02_hyp-2_full.json


In [113]:
tokens = filename.split('_')

In [114]:
dataset = {
    'domain' : [],\
    'problem' : [],\
    'observability' : [],\
    'goals' : [],\
    'judge_point' : [],\
    'obs_len' : [],\
    'true_goal': [],\
    'time': [],\
    'posterior': []}

sum_obs_dict = dict()
problems_obs_dict = dict()

sum_goals = 0

for pathname in json_files:
    filename, ext = os.path.basename(pathname).split('.')
    tokens = filename.split('_')
    approach = tokens[0]
    model_type = tokens[-1]
    data = {}
    with open(pathname) as instream:
        buffer = instream.read()
        data = json.loads(buffer)
    #print(approach, model_type, data['domain'], data['problem'], data['observability'])
    num_goals = len(data["G"])
    true_goal = data["G"].index(data['G*'])
    likelihoods = data['P(Obs | G)']
    sum_goals += num_goals
    
    if data['observability'] in sum_obs_dict:
        problems_obs_dict[data['observability']] += 1
        sum_obs_dict[data['observability']] += len(data['Obs'])
    else:
        problems_obs_dict[data['observability']] = 1
        sum_obs_dict[data['observability']] = len(data['Obs'])
    
    for k, prob_O_G in enumerate(likelihoods):
        post_probs = [compute_posterior(j, prob_O_G, np.ones(num_goals)/num_goals) \
                     for j in range(len(data['G']))]
        dataset['domain'] += [data['domain']]
        dataset['problem'] += [data['problem']]
        dataset['goals'] += [num_goals]
        dataset['judge_point'] += [k]
        dataset['obs_len'] += [len(data['Obs'])]
        dataset['true_goal'] += [true_goal]
        dataset['observability'] += [data['observability']]
        dataset['time'] += [data['time']]
        dataset['posterior'] += [np.array(post_probs)]

avg_obs = 0.0
sum_obs = 0.0
avg_obs_observability = dict()
for k in sum_obs_dict.keys():
    avg = sum_obs_dict[k] / problems_obs_dict[k]
    avg_obs_observability[k] = avg
    sum_obs += avg
    
avg_obs = sum_obs / len(sum_obs_dict)

avg_goals = sum_goals/len(json_files)

In [115]:
dataset = pd.DataFrame(dataset)

In [116]:
def compute_accuracy(dataset, top_k):
    tp_count = 0
    for index, row in dataset.iterrows():
        top_k_goals = np.argpartition(row['posterior'], -top_k)[-top_k:]
        if row['true_goal'] in top_k_goals:
            tp_count += 1
    return tp_count / len(dataset)

In [117]:
def compute_recognition_time(dataset):
    sum_time = 0
    for index, row in dataset.iterrows():
        sum_time += row['time']

    return (sum_time / len(dataset))

In [118]:
def compute_error_rates(dataset):
    tpr = 0.0
    fpr = 0.0
    fnr = 0.0

    for index, row in dataset.iterrows():
        tp_count = 0
        tn_count = 0
        fp_count = 0
        fn_count = 0

        top_goal = np.argmax(row['posterior'])
        max_goals = [ k for k, p in enumerate(row['posterior']) if p == row['posterior'][top_goal]]
        
        if row['true_goal'] in max_goals:
            tp_count = 1
        fn_count = 1 - tp_count
        fp_count = len(max_goals) - tp_count
        tn_count = row['goals'] - fp_count
    
        tpr += tp_count/(tp_count + fn_count)
        fpr += fp_count/(fp_count + tn_count)
        fnr += fn_count/(fn_count + tp_count)
    return tpr/len(dataset), fpr/len(dataset), fnr/len(dataset)

## Online Goal Recognition

In [119]:
approaches = ['approach']
observabilities = ['10', '25', '30', '50', '70', '75', '100']

summary = {'approach': [], 'observability': [], 'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': []}

for approach, observability in itertools.product(approaches, observabilities):
    df = dataset.loc[(dataset['observability'] == observability)]
    if len(df) == 0: 
        continue
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr = compute_error_rates(df)
    summary['approach'] += [approach]
    summary['observability'] += [observability]
    summary['top1'] += [top_1_acc]
    summary['top2'] += [top_2_acc]
    summary['top3'] += [top_3_acc]
    summary['tpr'] += [tpr]
    summary['fpr'] += [fpr]
    summary['fnr'] += [fnr]

In [120]:
summary = pd.DataFrame(summary)

In [121]:
summary[['observability', 'top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr']]

Unnamed: 0,observability,top1,top2,top3,tpr,fpr,fnr
0,10,0.480392,0.794118,0.901961,0.558824,0.155637,0.441176
1,30,0.517691,0.798883,0.918063,0.582868,0.144786,0.417132
2,50,0.503546,0.776596,0.901891,0.568558,0.14805,0.431442
3,70,0.490741,0.780303,0.906566,0.558923,0.151726,0.441077
4,100,0.486339,0.777778,0.903461,0.553734,0.152095,0.446266


## Offline Goal Recognition

In [122]:
offline = {'observability': [], 'avg_obs': [], 'time': [], 'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': []}

for approach, observability in itertools.product(approaches, observabilities):
    df = dataset.loc[(dataset['observability'] == observability)\
                    & (dataset['judge_point'] == dataset['obs_len'] - 1)]
    if len(df) == 0: continue
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr = compute_error_rates(df)
    avg_time = compute_recognition_time(df)
    offline['observability'] += [observability]
    offline['avg_obs'] += [avg_obs_observability[observability]]
    offline['time'] += [avg_time]
    offline['top1'] += [top_1_acc]
    offline['top2'] += [top_2_acc]
    offline['top3'] += [top_3_acc]
    offline['tpr'] += [tpr]
    offline['fpr'] += [fpr]
    offline['fnr'] += [fnr]

In [123]:
offline = pd.DataFrame(offline)
offline[['observability', 'avg_obs', 'time', 'top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr']]

Unnamed: 0,observability,avg_obs,time,top1,top2,top3,tpr,fpr,fnr
0,10,1.888889,144.880088,0.62037,0.916667,0.981481,0.731481,0.113426,0.268519
1,30,4.972222,141.315068,0.75,0.925926,0.972222,0.842593,0.078704,0.157407
2,50,7.833333,141.969204,0.787037,0.935185,0.981481,0.888889,0.081019,0.111111
3,70,11.0,142.935522,0.833333,0.962963,1.0,0.953704,0.069444,0.046296
4,100,15.25,155.881186,0.833333,0.972222,1.0,0.972222,0.069444,0.027778


In [124]:
print('Average observations: %s' % avg_obs)
print('Average goals: %s' % avg_goals)

Average observations: 8.188888888888888
Average goals: 4.0


### First Observation

In [125]:
first_obs = {'observability': [], 'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': []}

for approach, observability in itertools.product(approaches, observabilities):
    df = dataset.loc[(dataset['observability'] == observability)\
                    & (dataset['judge_point'] == 0)]
    if len(df) == 0: continue
    #print(len(df))
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr = compute_error_rates(df)
    first_obs['observability'] += [observability]
    first_obs['top1'] += [top_1_acc]
    first_obs['top2'] += [top_2_acc]
    first_obs['top3'] += [top_3_acc]
    first_obs['tpr'] += [tpr]
    first_obs['fpr'] += [fpr]
    first_obs['fnr'] += [fnr]

In [126]:
first_obs = pd.DataFrame(first_obs)
first_obs[['observability', 'top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr']]

Unnamed: 0,observability,top1,top2,top3,tpr,fpr,fnr
0,10,0.435185,0.777778,0.898148,0.490741,0.164352,0.509259
1,30,0.342593,0.675926,0.833333,0.398148,0.19213,0.601852
2,50,0.277778,0.611111,0.796296,0.361111,0.215278,0.638889
3,70,0.259259,0.592593,0.805556,0.351852,0.226852,0.648148
4,100,0.277778,0.611111,0.805556,0.361111,0.222222,0.638889
