In [1]:
import pandas as pd
import re

MISSION_MUL = 4

def parse_string(s):
    pattern = re.compile(
        r"ensemble_\$fellowship_\$(.*?)\((.*?)\)\[(.*?)\]"
    )
    match = pattern.search(s)
    if not match:
        raise ValueError(f"string {s} does not match pattern")
    
    idx_str, models_str, weights_str = match.groups()
    models = re.findall(r"'(.*?)'", models_str)
    weights = [float(w.strip()) for w in weights_str.split(",")]

    return {
        "index": int(idx_str),
        "models": models,
        "weights": weights
    }


with open('ensemble_info.txt', 'r') as f:
    data = f.read()
    ensembles = [parse_string(d) for d in data.strip().split('\n')]


no_guard_df = pd.read_csv('no_guard.csv')
one_astar_df = pd.read_csv('one_astar.csv')
three_semi_df = pd.read_csv('three_semi_astar.csv')
three_astar_df = pd.read_csv('three_astar.csv')

processed = []

for df in [no_guard_df, one_astar_df, three_semi_df, three_astar_df]:
    df = df.iloc[:594].copy()
    df.loc[:, 'models'] = [e["models"] for e in ensembles]
    df.loc[:, 'weights'] = [e["weights"] for e in ensembles] 
    df.loc[:, 'explore'] = df['recon'] + MISSION_MUL * df['mission']
    df.loc[:, 'min explore'] = df['min recon'] + MISSION_MUL * df['min mission']
    cols = ['models', 'weights'] + [col for col in df.columns if col not in ['model', 'models', 'weights']]
    df = df[cols]
    processed.append(df)

no_guard_df, one_astar_df, three_semi_df, three_astar_df = processed
no_guard_df.head()

Unnamed: 0,models,weights,id,min w/o capture recon,min w/o catpure,recon,var recon,min recon,max recon,mission,...,seen,var seen,min seen,max seen,ep length,var ep length,min ep length,max ep length,explore,min explore
0,"[atlanta-8M-deadend-8M, avignon-8M, avignon-ar...","[0.2, 0.4, 0.4]",0,,,40.84,30.8744,17.0,54.0,22.48,...,154.87,561.4731,30.0,210.0,100.0,0.0,100.0,100.0,130.76,29.0
1,"[atlanta-8M-deadend-8M, avignon-8M, avignon-ar...","[0.4, 0.2, 0.4]",1,,,40.15,27.4675,25.0,54.0,21.96,...,149.46,558.0884,30.0,198.0,100.0,0.0,100.0,100.0,127.99,37.0
2,"[atlanta-8M-deadend-8M, avignon-8M, avignon-ar...","[0.4, 0.4, 0.2]",2,,,40.04,30.9384,22.0,53.0,22.32,...,151.57,501.4251,29.0,188.0,99.98,0.0396,98.0,100.0,129.32,34.0
3,"[atlanta-8M-deadend-8M, avignon-8M, avignon-ar...","[0.2, 0.2, 0.6]",3,,,40.09,26.4619,16.0,53.0,22.09,...,150.82,578.7276,30.0,197.0,100.0,0.0,100.0,100.0,128.45,28.0
4,"[atlanta-8M-deadend-8M, avignon-8M, avignon-ar...","[0.2, 0.6, 0.2]",4,,,40.46,28.2484,26.0,53.0,22.99,...,154.9,500.07,63.0,194.0,100.0,0.0,100.0,100.0,132.42,82.0


In [11]:
def top_x(df: pd.DataFrame, metric: str, x: int = 20):
    top_rows = df.sort_values(by=metric, ascending=False).head(x)
    return [i for i in zip(top_rows['id'], top_rows['models'].tolist(), top_rows['weights'].tolist(), top_rows[metric])]

In [12]:
top_mean_recon = top_x(no_guard_df, 'recon', 80)
top_min_recon = top_x(no_guard_df, 'min recon', 80)
top_mean_mission = top_x(no_guard_df, 'mission', 80)
top_min_mission = top_x(no_guard_df, 'min mission', 80)
top_mean_explore = top_x(no_guard_df, 'explore', 30)
top_min_explore = top_x(no_guard_df, 'min explore', 30)

In [13]:
print(top_min_explore[:10])
print(top_mean_explore[:10])

[(190, ['atlanta-8M-deadend-8M', 'avignon-ariane24-4M', 'caracas-ariane25-4M'], [0.2, 0.6, 0.2], 92.0), (553, ['avignon-8M', 'avignon-ariane25-normal-2M8', 'chitose-6M'], [0.4, 0.2, 0.4], 91.0), (423, ['avignon-8M', 'avignon-ariane20-8M', 'chitose-6M'], [0.2, 0.2, 0.6], 91.0), (28, ['atlanta-8M-deadend-8M', 'avignon-8M', 'avignon-ariane25-4M'], [0.2, 0.6, 0.2], 91.0), (373, ['avignon-8M', 'avignon-ariane4-8M', 'chitose-6M'], [0.4, 0.2, 0.4], 91.0), (528, ['avignon-8M', 'avignon-ariane25-4M', 'chitose-6M'], [0.2, 0.4, 0.4], 91.0), (120, ['atlanta-8M-deadend-8M', 'avignon-ariane20-8M', 'avignon-ariane24-normal-2M4'], [0.2, 0.4, 0.4], 91.0), (475, ['avignon-8M', 'avignon-ariane24-normal-2M4', 'avignon-ariane25-4M'], [0.4, 0.2, 0.4], 91.0), (434, ['avignon-8M', 'avignon-ariane24-4M', 'avignon-ariane24-normal-2M4'], [0.4, 0.4, 0.2], 90.0), (465, ['avignon-8M', 'avignon-ariane24-4M', 'chitose-6M'], [0.2, 0.2, 0.6], 90.0)]
[(512, ['avignon-8M', 'avignon-ariane25-4M', 'avignon-ariane25-normal-

In [46]:
idx_trackers = {}

stats = [top_mean_recon, top_min_recon, top_mean_mission, top_min_mission]
stats = [top_mean_explore[:15], top_min_explore[:15]]

for i, top in enumerate(stats):
    for entry in top:
        idx_trackers[entry[0]] = idx_trackers.get(entry[0], []) + [i]

In [47]:
idx_trackers

{512: [0],
 511: [0, 1],
 391: [0],
 476: [0],
 479: [0],
 24: [0],
 393: [0],
 349: [0],
 478: [0],
 515: [0],
 433: [0],
 475: [0, 1],
 240: [0],
 397: [0],
 557: [0],
 190: [1],
 120: [1],
 28: [1],
 553: [1],
 423: [1],
 528: [1],
 373: [1],
 465: [1],
 434: [1],
 175: [1],
 176: [1],
 530: [1],
 198: [1]}

In [48]:
models_used = [
    "atlanta-8M-deadend-8M",
    "avignon-8M",
    "avignon-ariane4-8M",
    "avignon-ariane20-8M",
    "avignon-ariane24-4M",
    "avignon-ariane24-normal-2M4",
    "avignon-ariane25-4M",
    "avignon-ariane25-normal-2M8",
    "caracas-8M",
    "caracas-ariane25-4M",
    "chitose-6M",
    "chitose-8M"
]

In [65]:
chitose_6m = no_guard_df[no_guard_df['models'].apply(lambda x: 'chitose-6M' in x and 'chitose-8m' not in x)]
chitose_8m = no_guard_df[no_guard_df['models'].apply(lambda x: 'chitose-8M' in x and 'chitose-6m' not in x)]

In [66]:
chitose_6m

Unnamed: 0,models,weights,id,min w/o capture recon,min w/o catpure,recon,var recon,min recon,max recon,mission,...,seen,var seen,min seen,max seen,ep length,var ep length,min ep length,max ep length,explore,min explore
48,"[atlanta-8M-deadend-8M, avignon-8M, chitose-6M]","[0.2, 0.4, 0.4]",48,,,44.79,27.5859,33.0,56.0,21.65,...,158.93,481.1651,92.0,208.0,100.0,0.0,100.0,100.0,131.39,89.0
49,"[atlanta-8M-deadend-8M, avignon-8M, chitose-6M]","[0.4, 0.2, 0.4]",49,,,44.59,27.5019,34.0,59.0,21.50,...,156.61,415.7179,102.0,211.0,100.0,0.0,100.0,100.0,130.59,78.0
50,"[atlanta-8M-deadend-8M, avignon-8M, chitose-6M]","[0.4, 0.4, 0.2]",50,,,43.43,26.6651,33.0,59.0,21.80,...,154.69,458.4939,84.0,191.0,100.0,0.0,100.0,100.0,130.63,73.0
51,"[atlanta-8M-deadend-8M, avignon-8M, chitose-6M]","[0.2, 0.2, 0.6]",51,,,45.73,26.8371,30.0,59.0,20.91,...,156.89,524.1779,64.0,202.0,100.0,0.0,100.0,100.0,129.37,70.0
52,"[atlanta-8M-deadend-8M, avignon-8M, chitose-6M]","[0.2, 0.6, 0.2]",52,,,42.80,23.8600,32.0,54.0,22.32,...,156.78,541.1716,99.0,208.0,100.0,0.0,100.0,100.0,132.08,84.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
583,"[avignon-8M, caracas-ariane25-4M, chitose-6M]","[0.4, 0.2, 0.4]",583,,,46.21,26.0659,31.0,60.0,21.05,...,161.06,390.8764,96.0,203.0,100.0,0.0,100.0,100.0,130.41,75.0
584,"[avignon-8M, caracas-ariane25-4M, chitose-6M]","[0.4, 0.4, 0.2]",584,,,47.06,23.5364,34.0,59.0,20.47,...,157.60,423.1400,100.0,200.0,100.0,0.0,100.0,100.0,128.94,78.0
585,"[avignon-8M, caracas-ariane25-4M, chitose-6M]","[0.2, 0.2, 0.6]",585,,,46.92,28.4936,29.0,59.0,20.34,...,159.07,480.9051,76.0,206.0,100.0,0.0,100.0,100.0,128.28,73.0
586,"[avignon-8M, caracas-ariane25-4M, chitose-6M]","[0.2, 0.6, 0.2]",586,,,48.27,30.5171,38.0,62.0,19.39,...,160.26,417.3124,95.0,195.0,100.0,0.0,100.0,100.0,125.83,66.0


### Head-to-head comparison of certain models

In [73]:
from itertools import combinations
import numpy as np

metrics_to_judge = ['recon', 'min recon', 'mission', 'min mission', 'explore', 'min explore']

weights_combi = [
    [0.2, 0.4, 0.4],
    [0.4, 0.2, 0.4],
    [0.4, 0.4, 0.2],
    [0.2, 0.2, 0.6],
    [0.2, 0.6, 0.2],
    [0.6, 0.2, 0.2],
]

def compare(df: pd.DataFrame, models_used: list[str], to_compare: list[str]):
    not_compared = []
    for model in models_used:
        if model in to_compare:
            continue

        not_compared.append(model)

    not_compared_combi = list(combinations(not_compared, 2))

    # scores = np.zeros((len(to_compare), len(metrics_to_judge)))
    scores = [[0] * len(metrics_to_judge) for _ in to_compare] 

    for weight in weights_combi:
        for combi in not_compared_combi:
            temp_score = np.zeros((len(to_compare), len(metrics_to_judge)))
            for i, model in enumerate(to_compare):
                full_combi = list(combi) + [model]
                full_combi.sort()

                result = df[df['models'].apply(lambda x: x == full_combi) & df['weights'].apply(lambda x: x == weight)]
                # result = df[df['weights'].apply(lambda x: x == weight)]
                if result.empty: 
                    break

                for j, metric in enumerate(metrics_to_judge):
                    temp_score[i][j] = result.iloc[0][metric]
            else:
                best_performing = np.argmax(temp_score, axis=0)
                for i, j in enumerate(best_performing):
                    scores[j][i] += 1

    return scores

compare(no_guard_df, models_used, ["chitose-6M", "chitose-8M"])

[[47, 65, 66, 65, 66, 59], [55, 37, 36, 37, 36, 43]]

In [71]:
models_used = [
    "atlanta-8M-deadend-8M",
    "avignon-8M",
    "avignon-ariane4-8M",
    "avignon-ariane20-8M",
    "avignon-ariane24-4M",
    "avignon-ariane24-normal-2M4",
    "avignon-ariane25-4M",
    "avignon-ariane25-normal-2M8",
    "caracas-8M",
    "caracas-ariane25-4M",
    "chitose-6M",
    "chitose-8M"
]

comparisons = [
    ["chitose-6M", "chitose-8M"],
    ["caracas-8M", "caracas-ariane25-4M"],
    ["avignon-ariane25-4M", "avignon-ariane25-normal-2M8"],
    ["avignon-ariane24-4M", "avignon-ariane24-normal-2M4"],
    ["avignon-ariane24-4M", "avignon-ariane25-4M"],
]

for comparison in comparisons:
    res = compare(no_guard_df, models_used, comparison)

    for i, model in enumerate(comparison):
        print(f"{model}:\t", res[i])

    print()

chitose-6M:	 [47, 65, 66, 65, 66, 59]
chitose-8M:	 [55, 37, 36, 37, 36, 43]

caracas-8M:	 [53, 49, 31, 44, 27, 42]
caracas-ariane25-4M:	 [49, 53, 71, 58, 75, 60]

avignon-ariane25-4M:	 [85, 69, 27, 53, 63, 58]
avignon-ariane25-normal-2M8:	 [5, 21, 63, 37, 27, 32]

avignon-ariane24-4M:	 [90, 77, 14, 51, 44, 56]
avignon-ariane24-normal-2M4:	 [0, 13, 76, 39, 46, 34]

avignon-ariane24-4M:	 [73, 62, 24, 58, 37, 54]
avignon-ariane25-4M:	 [17, 28, 66, 32, 53, 36]



### Comparison with single model evaluation

In [7]:
single_df = pd.read_csv('no_guard_single.csv')
single_df

Unnamed: 0,model,scout_recon_mean,scout_recon_var,scout_recon_min,scout_recon_max,scout_mission_mean,scout_mission_var,scout_mission_min,scout_mission_max,wall_collision_mean,...,custom_DEADEND_BASE_min,custom_DEADEND_BASE_max,custom_SEE_NEW_mean,custom_SEE_NEW_var,custom_SEE_NEW_min,custom_SEE_NEW_max,ep_len_mean,ep_len_var,ep_len_min,ep_len_max
0,Atlanta-8M-deadend-8M,36.07,104.7651,3.0,52.0,20.45,41.1075,3.0,32.0,0.62,...,0.0,80.0,141.02,1368.8596,12.0,184.0,100.0,0.0,100.0,100.0
1,avignon-8M,39.79,27.3859,31.0,51.0,22.97,19.5091,12.0,31.0,0.04,...,0.0,30.0,155.23,413.5371,83.0,202.0,99.95,0.2475,95.0,100.0
2,avignon-ariane4-8M,39.2,29.2,15.0,52.0,21.89,24.5379,3.0,31.0,0.7,...,0.0,72.0,151.31,612.3339,30.0,201.0,100.0,0.0,100.0,100.0
3,avignon-ariane20-8M,48.97,29.5291,30.0,62.0,20.23,17.4371,10.0,30.0,0.0,...,0.0,30.0,161.61,399.9179,96.0,204.0,100.0,0.0,100.0,100.0
4,avignon-ariane24-4M,45.14,19.5804,33.0,58.0,21.31,12.9739,8.0,29.0,0.0,...,0.0,23.0,161.52,400.3696,108.0,210.0,100.0,0.0,100.0,100.0
5,avignon-ariane24-normal-2M4,40.57,29.8251,27.0,54.0,23.35,19.5875,10.0,32.0,0.0,...,0.0,13.0,156.85,307.5875,94.0,187.0,100.0,0.0,100.0,100.0
6,avignon-ariane25-4M,44.88,21.1056,33.0,58.0,21.87,17.6131,10.0,31.0,0.0,...,0.0,24.0,162.03,413.4891,97.0,201.0,100.0,0.0,100.0,100.0
7,avignon-ariane25-normal-2M8,40.78,26.8916,19.0,53.0,23.03,16.9291,13.0,32.0,0.0,...,0.0,19.0,157.06,406.6764,104.0,190.0,100.0,0.0,100.0,100.0
8,caracas-8M,48.7,34.51,31.0,63.0,18.35,15.4475,8.0,29.0,0.0,...,0.0,47.0,151.33,504.9411,77.0,201.0,100.0,0.0,100.0,100.0
9,caracas-ariane25-4M,49.35,29.5075,33.0,61.0,19.02,20.9196,4.0,30.0,0.0,...,0.0,25.0,157.46,414.6684,108.0,205.0,100.0,0.0,100.0,100.0


In [None]:
single_df['explore'] = single_df['scout_recon_mean'] + 4 * single_df['scout_mission_mean']
single_df['min explore'] = single_df['scout_recon_min'] + 4 * single_df['scout_mission_min']
top_rows = single_df.sort_values(by='min explore', ascending=False).head(5)
top_rows

Unnamed: 0,model,scout_recon_mean,scout_recon_var,scout_recon_min,scout_recon_max,scout_mission_mean,scout_mission_var,scout_mission_min,scout_mission_max,wall_collision_mean,...,custom_SEE_NEW_mean,custom_SEE_NEW_var,custom_SEE_NEW_min,custom_SEE_NEW_max,ep_len_mean,ep_len_var,ep_len_min,ep_len_max,explore,min_explore
1,avignon-8M,39.79,27.3859,31.0,51.0,22.97,19.5091,12.0,31.0,0.04,...,155.23,413.5371,83.0,202.0,99.95,0.2475,95.0,100.0,131.67,79.0
6,avignon-ariane25-4M,44.88,21.1056,33.0,58.0,21.87,17.6131,10.0,31.0,0.0,...,162.03,413.4891,97.0,201.0,100.0,0.0,100.0,100.0,132.36,73.0
11,chitose-8M,46.08,21.3536,29.0,57.0,20.04,16.4584,11.0,28.0,0.0,...,156.36,417.8304,96.0,197.0,100.0,0.0,100.0,100.0,126.24,73.0
7,avignon-ariane25-normal-2M8,40.78,26.8916,19.0,53.0,23.03,16.9291,13.0,32.0,0.0,...,157.06,406.6764,104.0,190.0,100.0,0.0,100.0,100.0,132.9,71.0
3,avignon-ariane20-8M,48.97,29.5291,30.0,62.0,20.23,17.4371,10.0,30.0,0.0,...,161.61,399.9179,96.0,204.0,100.0,0.0,100.0,100.0,129.89,70.0
