# Experimental Evaluation: Goal Recognition in FOND with Temporally Extended Goals (LTLf and PLTL)

In [199]:
BASE_DIR = '../fond-recognition-benchmarks'

In [200]:
import os
import sys
import pandas as pd
import glob
import json
import numpy as np
import itertools

In [201]:
def compute_posterior(index, prob, priors ):
    num = probs[index] * priors[index]
    denom = np.sum(probs*priors)
    return num / denom

In [202]:
RESULTS_DIR = os.path.join(BASE_DIR, 'results')

In [203]:
json_files = glob.glob(os.path.join(RESULTS_DIR, '*.json'))

In [204]:
print("Results found:", len(json_files))

Results found: 16


# Results Collection

In [205]:
print(json_files[0])
print(os.path.basename(json_files[0]))
filename, ext = os.path.basename(json_files[0]).split('.')

../fond-recognition-benchmarks/results/triangle-tireworld_p01_hyp-1_10_2.json
triangle-tireworld_p01_hyp-1_10_2.json


In [206]:
tokens = filename.split('_')

In [218]:
dataset = {
    'domain' : [],\
    'problem' : [],\
    'goals' : [],\
    'judge_point' : [],\
    'obs_len' : [],\
    'true_goal': [],\
    'posterior': []}

for pathname in json_files:
    filename, ext = os.path.basename(pathname).split('.')
    tokens = filename.split('_')
    approach = tokens[0]
    model_type = tokens[-1]
    data = {}
    with open(pathname) as instream:
        buffer = instream.read()
        data = json.loads(buffer)
    #print(approach, model_type, data['domain'], data['problem'], data['observability'])
    num_goals = len(data["G"])
    true_goal = data["G"].index(data['G*'])
    likelihoods = data['P(Obs | G)']
    probs = []
    for l in likelihoods:
        probs.append(l[0])
    for k, prob_O_G in enumerate(likelihoods):
        post_probs = [compute_posterior(j, probs, np.ones(num_goals)/num_goals) \
                     for j in range(len(data['G']))]
        dataset['domain'] += [data['domain']]
        dataset['problem'] += [data['problem']]
        dataset['goals'] += [num_goals]
        dataset['judge_point'] += [k]
        dataset['obs_len'] += [len(data['Obs'])]
        dataset['true_goal'] += [true_goal]
        dataset['posterior'] += [np.array(post_probs)]

[[0.9835432294689368], [0.9934172917875748], [0.023039478743488506]]
[0.4917716147344684, 0.4967086458937873, 0.011519739371744253]
[0.4917716147344684, 0.4967086458937873, 0.011519739371744253]
[0.4917716147344684, 0.4967086458937873, 0.011519739371744253]
[[0.5925925925925926, 0.9711377793516491, 0.9835432294689368, 0.5], [0.7037037037037037, 0.980758519567766, 0.9934172917875748, 1.0], [0.7037037037037037, 0.04810370108058478, 0.023039478743488506, 0.5]]
[0.2962962962962963, 0.35185185185185186, 0.35185185185185186]
[0.2962962962962963, 0.35185185185185186, 0.35185185185185186]
[0.2962962962962963, 0.35185185185185186, 0.35185185185185186]
[[0.9741790818599768], [0.9838619261624855], [0.041958991977537496]]
[0.4870895409299884, 0.49193096308124273, 0.020979495988768748]
[0.4870895409299884, 0.49193096308124273, 0.020979495988768748]
[0.4870895409299884, 0.49193096308124273, 0.020979495988768748]
[[0.5925925925925926, 0.9741790818599768], [0.7037037037037037, 0.9838619261624855], [0.

In [208]:
dataset = pd.DataFrame(dataset)

In [209]:
def compute_accuracy(dataset, top_k):
    tp_count = 0
    for index, row in dataset.iterrows():
        top_k_goals = np.argpartition(row['posterior'], -top_k)[-top_k:]
        if row['true_goal'] in top_k_goals:
            tp_count += 1
    return tp_count / len(dataset)

In [210]:
def compute_error_rates(dataset):
    tpr = 0.0
    fpr = 0.0
    fnr = 0.0
    
    for index, row in dataset.iterrows():
        tp_count = 0
        tn_count = 0
        fp_count = 0
        fn_count = 0

        top_goal = np.argmax(row['posterior'])
        max_goals = [ k for k, p in enumerate(row['posterior']) if p == row['posterior'][top_goal]]
        
        if row['true_goal'] in max_goals:
            tp_count = 1
        fn_count = 1 - tp_count
        fp_count = len(max_goals) - tp_count
        tn_count = row['goals'] - fp_count
    
        tpr += tp_count/(tp_count + fp_count)
        fpr += fp_count/(fp_count + tn_count)
        fnr += fn_count/(fn_count + tp_count)
    return tpr/len(dataset), fpr/len(dataset), fnr/len(dataset)

## Summary of Results

In [211]:
approaches = ['approach']

summary = {'approach': [], 'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': []}

for approach in itertools.product(approaches):
    df = dataset
    if len(df) == 0: continue
    print(len(df))
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr = compute_error_rates(df)
    summary['approach'] += [approach]
    summary['top1'] += [top_1_acc]
    summary['top2'] += [top_2_acc]
    summary['top3'] += [top_3_acc]
    summary['tpr'] += [tpr]
    summary['fpr'] += [fpr]
    summary['fnr'] += [fnr]

48


In [212]:
summary = pd.DataFrame(summary)

In [213]:
summary[['top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr']]

Unnamed: 0,top1,top2,top3,tpr,fpr,fnr
0,0.25,1.0,1.0,0.625,0.25,0.0


## Last Observation

In [214]:
offline = {'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': []}

for approach in itertools.product(approaches):
    df = dataset.loc[(dataset['judge_point'] == dataset['obs_len'] - 1)]
    if len(df) == 0: continue
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr = compute_error_rates(df)
    offline['top1'] += [top_1_acc]
    offline['top2'] += [top_2_acc]
    offline['top3'] += [top_3_acc]
    offline['tpr'] += [tpr]
    offline['fpr'] += [fpr]
    offline['fnr'] += [fnr]

In [215]:
offline = pd.DataFrame(offline)
offline[['top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr']]

Unnamed: 0,top1,top2,top3,tpr,fpr,fnr
0,0.444444,1.0,1.0,0.722222,0.185185,0.0


### First Observation

In [216]:
first_obs = {'top1': [], 'top2': [], 'top3': [], 'tpr' : [], 'fpr':[], 'fnr': []}

for approach in itertools.product(approaches):
    df = dataset.loc[(dataset['judge_point'] == 0)]
    if len(df) == 0: continue
    #print(len(df))
    top_1_acc = compute_accuracy(df, 1)
    top_2_acc = compute_accuracy(df, 2)
    top_3_acc = compute_accuracy(df, 3)
    tpr, fpr, fnr = compute_error_rates(df)
    first_obs['top1'] += [top_1_acc]
    first_obs['top2'] += [top_2_acc]
    first_obs['top3'] += [top_3_acc]
    first_obs['tpr'] += [tpr]
    first_obs['fpr'] += [fpr]
    first_obs['fnr'] += [fnr]

In [217]:
first_obs = pd.DataFrame(first_obs)
first_obs[['top1', 'top2', 'top3', 'tpr', 'fpr', 'fnr']]

Unnamed: 0,top1,top2,top3,tpr,fpr,fnr
0,0.25,1.0,1.0,0.625,0.25,0.0
