## Imports

In [7]:
import os
import json
import numpy as np
import pandas as pd

from typing import Dict, List
from copy import deepcopy

## Set Parameters

In [25]:
RESULTS_PATH = os.path.join(".", "minigrid-results")
SEEDS = [100, 200, 300, 400]
EVAL_EVERY = 1000
STARTING_EVAL_STEP = 2500
END_EVAL_STEP = 100500
NUM_EVAL_EPISODES = 15

## Initialise True and Empirical Transition Probs

In [21]:
TRUE_BLUE_PROBS = [1/3, 1/3, 1/3, 0] # to: green, middle goal, purple, wrong
TRUE_GREEN_PROBS = [1/2, 1/2, 0] # to: top goal, purple, wrong
TRUE_LEFT_PURPLE_PROBS = [1/2, 1/2, 0] # to: middle goal, blue, wrong
TRUE_RIGHT_PURPLE_PROBS = [1/2, 1/2, 0] # to: middle goal, bottom goal, wrong

initial_empirical_probs = {
    "blue": {
        "green": 0.0,
        "middle": 0.0,
        "purple": 0.0,
        "wrong": 1.0,
    },
    "green": {
        "top": 0.0,
        "purple": 0.0,
        "wrong": 1.0,
    },
    "left_purple": {
        "middle": 0.0,
        "blue": 0.0,
        "wrong": 1.0,
    },
    "right_purple": {
        "middle": 0.0,
        "bottom": 0.0,
        "wrong": 1.0,
    },
}

running_empirical_estimates: List[Dict[str, Dict[str, float]]] = []

## Process One Eval Step

In [15]:
def get_empirical_estimates(seed: int, prev_estimate: Dict[str, Dict[str, float]], eval_step: int) -> Dict[str, Dict[str, float]]:
    """Get the empirical transition estimates for the current eval step.

    Args:
        seed (int): experiment seed to load data from
        prev_estimate (Dict[str, Dict[str, float]]): empirical estimates for prev eval step
        eval_step (int): current eval step

    Returns:
        Dict[str, Dict[str, float]]: empirical estimates for current eval step
    """
    
    step_dir = os.path.join(RESULTS_PATH, f"minigrid-stochastic-results-seed-{seed}", f"minigrid-stochastic-results-seed-{seed}", f"step_{eval_step}")
    transition_estimate = deepcopy(initial_empirical_probs)
    transition_counts = deepcopy(initial_empirical_probs)
    for eval_episode in range(NUM_EVAL_EPISODES):
        with open(os.path.join(step_dir, f"eval_episode_{eval_episode}.json"), "r") as f:
            episode_data = json.load(f)
            transition_data = episode_data["transition_probabilities"]
            # Update blue transition counts and estimates
            if transition_data["blue_green_prob"] != -1:
                transition_estimate["blue"]["green"] += transition_data["blue_green_prob"]
                transition_counts["blue"]["green"] += 1
                transition_estimate["blue"]["middle"] += transition_data["blue_middle_prob"]
                transition_counts["blue"]["middle"] += 1
                transition_estimate["blue"]["purple"] += transition_data["blue_purple_prob"]
                transition_counts["blue"]["purple"] += 1
                transition_estimate["blue"]["wrong"] += transition_data["blue_wrong_prob"]
                transition_counts["blue"]["wrong"] += 1
            # Update green transition counts and estimates
            if transition_data["green_leftgoal_prob"] != -1:
                transition_estimate["green"]["top"] += transition_data["green_leftgoal_prob"]
                transition_counts["green"]["top"] += 1
                transition_estimate["green"]["purple"] += transition_data["green_purple_prob"]
                transition_counts["green"]["purple"] += 1
                transition_estimate["green"]["wrong"] += transition_data["green_wrong_prob"]
                transition_counts["green"]["wrong"] += 1
            # Update left purple transition counts and estimates
            if transition_data["purple_left_blue_prob"] != -1:
                transition_estimate["left_purple"]["middle"] += transition_data["purple_left_middle_prob"]
                transition_counts["left_purple"]["middle"] += 1
                transition_estimate["left_purple"]["blue"] += transition_data["purple_left_blue_prob"]
                transition_counts["left_purple"]["blue"] += 1
                transition_estimate["left_purple"]["wrong"] += transition_data["purple_left_wrong_prob"]
                transition_counts["left_purple"]["wrong"] += 1
            # Update right purple transition counts and estimates
            if transition_data["purple_right_middle_prob"] != -1:
                transition_estimate["right_purple"]["middle"] += transition_data["purple_right_middle_prob"]
                transition_counts["right_purple"]["middle"] += 1
                transition_estimate["right_purple"]["bottom"] += transition_data["purple_right_bottom_prob"]
                transition_counts["right_purple"]["bottom"] += 1
                transition_estimate["right_purple"]["wrong"] += transition_data["purple_right_wrong_prob"]
                transition_counts["right_purple"]["wrong"] += 1
    # Take the average of the estimates
    for key in transition_estimate.keys():
        for sub_key in transition_estimate[key].keys():
            if transition_counts[key][sub_key] > 0:
                transition_estimate[key][sub_key] /= transition_counts[key][sub_key]
            else:
                transition_estimate[key][sub_key] = prev_estimate[key][sub_key]
    return transition_estimate

test = get_empirical_estimates(100, initial_empirical_probs, 3500)
print(test)
del test

{'blue': {'green': 0.3128285681044945, 'middle': 0.07608784267767486, 'purple': 0.18724011026018658, 'wrong': 0.423843478957644}, 'green': {'top': 0.0, 'purple': 0.0, 'wrong': 0.0}, 'left_purple': {'middle': 0.0, 'blue': 0.0, 'wrong': 0.0}, 'right_purple': {'middle': 0.0, 'bottom': 0.0, 'wrong': 0.0}}


## Get probabilities over time

In [19]:
def get_empirical_estimates_for_seed(seed: int) -> List[Dict[str, Dict[str, float]]]:
    """Get the empirical transition estimates for all eval steps for a given seed.

    Args:
        seed (int): experiment seed to load data from

    Returns:
        List[Dict[str, Dict[str, float]]]: empirical estimates for all eval steps
    """
    
    empirical_estimates = []
    prev_estimate = deepcopy(initial_empirical_probs)
    for eval_step in range(STARTING_EVAL_STEP, END_EVAL_STEP+1, EVAL_EVERY):
        estimate = get_empirical_estimates(seed, prev_estimate, eval_step)
        empirical_estimates.append(estimate)
        prev_estimate = estimate
    return empirical_estimates

test = get_empirical_estimates_for_seed(400)
print(test)
print(len(test))
del test

[{'blue': {'green': 0.0, 'middle': 0.0, 'purple': 0.0, 'wrong': 0.0}, 'green': {'top': 0.0, 'purple': 0.0, 'wrong': 0.0}, 'left_purple': {'middle': 0.0, 'blue': 0.0, 'wrong': 0.0}, 'right_purple': {'middle': 0.0, 'bottom': 0.0, 'wrong': 0.0}}, {'blue': {'green': 0.23071738971738973, 'middle': 0.148021593021593, 'purple': 0.49128550028550033, 'wrong': 0.12997551697551696}, 'green': {'top': 0.047619047619047616, 'purple': 0.14285714285714285, 'wrong': 0.8095238095238094}, 'left_purple': {'middle': 1.0, 'blue': 0.0, 'wrong': 0.0}, 'right_purple': {'middle': 0.6428571428571429, 'bottom': 0.0, 'wrong': 0.35714285714285715}}, {'blue': {'green': 0.3873873705467909, 'middle': 0.18116442012094183, 'purple': 0.42286796241868707, 'wrong': 0.008580246913580248}, 'green': {'top': 0.5986714975845411, 'purple': 0.08522544283413848, 'wrong': 0.31610305958132046}, 'left_purple': {'middle': 0.7, 'blue': 0.0, 'wrong': 0.3}, 'right_purple': {'middle': 0.48409090909090907, 'bottom': 0.4431818181818181, 'wr

## Calculate TVD for each teleporter

In [22]:
def calculate_tvd_for_empirical_estimate(empirical_estimate: Dict[str, Dict[str, float]]) -> Dict[str, float]:
    """Calculate the total variation distance for a given empirical estimate.

    Args:
        empirical_estimate (Dict[str, Dict[str, float]]): empirical estimate to calculate TVD for

    Returns:
        Dict[str, float]: total variation distance for each state
    """
    
    tvd = {}
    # Calculate the total variation distance for each state
    tvd["blue"] = 0.5 * np.sum(np.abs(np.array(list(empirical_estimate["blue"].values())) - np.array(TRUE_BLUE_PROBS)))
    tvd["green"] = 0.5 * np.sum(np.abs(np.array(list(empirical_estimate["green"].values())) - np.array(TRUE_GREEN_PROBS)))
    tvd["left_purple"] = 0.5 * np.sum(np.abs(np.array(list(empirical_estimate["left_purple"].values())) - np.array(TRUE_LEFT_PURPLE_PROBS)))
    tvd["right_purple"] = 0.5 * np.sum(np.abs(np.array(list(empirical_estimate["right_purple"].values())) - np.array(TRUE_RIGHT_PURPLE_PROBS)))
    return tvd

test = calculate_tvd_for_empirical_estimate(initial_empirical_probs)
print(test)
del test

test = calculate_tvd_for_empirical_estimate(get_empirical_estimates(100, initial_empirical_probs, 3500))
print(test)
del test

{'blue': 1.0, 'green': 1.0, 'left_purple': 1.0, 'right_purple': 1.0}
{'blue': 0.4418483702402176, 'green': 1.0, 'left_purple': 1.0, 'right_purple': 1.0}


## Calculate TVD at each Eval Epoch

In [None]:
def get_tvd_for_seed(seed: int) -> List[Dict[str, float]]:
    """Get the total variation distance for all eval steps for a given seed.

    Args:
        seed (int): experiment seed to load data from

    Returns:
        List[Dict[str, float]]: total variation distance for all eval steps
    """
    
    empirical_estimates = get_empirical_estimates_for_seed(seed)
    tvd = []
    for estimate in empirical_estimates:
        tvd.append(calculate_tvd_for_empirical_estimate(estimate))
    return tvd

test = get_tvd_for_seed(100)
print(test)
print(len(test))
del test

[{'blue': 1.0, 'green': 1.0, 'left_purple': 1.0, 'right_purple': 1.0}, {'blue': 0.4418483702402176, 'green': 1.0, 'left_purple': 1.0, 'right_purple': 1.0}, {'blue': 0.5463968509131049, 'green': 0.5833333333333333, 'left_purple': 1.0, 'right_purple': 1.0}, {'blue': 0.40476190476190477, 'green': 0.75, 'left_purple': 1.0, 'right_purple': 1.0}, {'blue': 0.5, 'green': 0.75, 'left_purple': 1.0, 'right_purple': 1.0}, {'blue': 0.6668055555555557, 'green': 0.16666666666666666, 'left_purple': 0.5454545454545454, 'right_purple': 1.0}, {'blue': 0.37635163987393594, 'green': 0.12589285714285714, 'left_purple': 0.33632848461021836, 'right_purple': 0.38460978835978843}, {'blue': 0.4409380341880342, 'green': 0.5384615384615384, 'left_purple': 0.4620745745745745, 'right_purple': 0.525}, {'blue': 0.3597100032583904, 'green': 0.065988960466563, 'left_purple': 0.4260460305277378, 'right_purple': 0.4454545454545455}, {'blue': 0.1685456557989765, 'green': 0.08470733329054703, 'left_purple': 0.39751411420433

## Compute results for each seed, add to DF and save to CSV

In [32]:
results_df = pd.DataFrame(columns=["seed", "eval_step", "blue_tvd", "green_tvd", "left_purple_tvd", "right_purple_tvd", "mean_tvd", "mean_tvd_excluding_left_purple"])

for seed in SEEDS:
    empirical_estimates = get_empirical_estimates_for_seed(seed)
    tvd = get_tvd_for_seed(seed)
    for eval_step, estimate in enumerate(empirical_estimates):
        tvd_values = tvd[eval_step]
        mean_tvd = np.mean(list(tvd_values.values()))
        mean_tvd_excluding_left_purple = np.mean([tvd_values["blue"], tvd_values["green"], tvd_values["right_purple"]])
        results_df = pd.concat([results_df, pd.DataFrame([[seed, STARTING_EVAL_STEP + eval_step * EVAL_EVERY, tvd_values["blue"], tvd_values["green"], tvd_values["left_purple"], tvd_values["right_purple"], mean_tvd, mean_tvd_excluding_left_purple]], columns=results_df.columns)], ignore_index=True)

results_df.to_csv("tvd_results.csv", index=False)

# Calculate mean and std across seeds for each eval step
mean_results_df = results_df.groupby("eval_step").agg(["mean", "std"])
print(mean_results_df)
mean_results_df.to_csv("tvd_mean_results.csv", index=True)
        

  results_df = pd.concat([results_df, pd.DataFrame([[seed, STARTING_EVAL_STEP + eval_step * EVAL_EVERY, tvd_values["blue"], tvd_values["green"], tvd_values["left_purple"], tvd_values["right_purple"], mean_tvd, mean_tvd_excluding_left_purple]], columns=results_df.columns)], ignore_index=True)


            seed              blue_tvd           green_tvd            \
            mean         std      mean       std      mean       std   
eval_step                                                              
2500       250.0  129.099445  1.000000  0.000000  1.000000  0.000000   
3500       250.0  129.099445  0.407408  0.080475  0.851190  0.197418   
4500       250.0  129.099445  0.378712  0.149197  0.591463  0.120295   
5500       250.0  129.099445  0.449859  0.156372  0.721187  0.152780   
6500       250.0  129.099445  0.577783  0.103325  0.643628  0.127818   
...          ...         ...       ...       ...       ...       ...   
96500      250.0  129.099445  0.143544  0.051069  0.175603  0.101538   
97500      250.0  129.099445  0.150515  0.009886  0.140022  0.036194   
98500      250.0  129.099445  0.120139  0.058878  0.144397  0.047634   
99500      250.0  129.099445  0.150139  0.032936  0.141747  0.075975   
100500     250.0  129.099445  0.140694  0.015857  0.140128  0.03