In [1]:
import numpy as np
from typing import Optional, List, Tuple
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import torch


from tqdm.notebook import trange, tqdm
from omegaconf import OmegaConf, DictConfig

# Set precision to 3 decimal places
np.set_printoptions(precision=3, suppress=True)

import pickle

from discrete_maze.maze import Maze
from discrete_maze.search_algorithm import GreedyAlgorithm, SearchAlgorithm
from discrete_maze.maze_dataset import MazeDataset
from discrete_maze.resnet import ResNet

In [2]:
# Define the configuration using OmegaConf
cfg = OmegaConf.create({
    "maze": {
        "width": {"min": 4, "max": 50},
        "height": {"min": 4, "max": 50},
        "cell_occupancy_prob": {"min": 0, "max": 0.3},
        "max_steps": "ShortestPath", # Use this to set the max steps to the shortest path between source and target * 2
        # "max_steps": "L1SourceTarget", # Use this to set the max steps to the L1 distance between source and target * 2
        # To set paramters to constant values, use a float
        # "width": 4,
        # "height": 4,
        # "cell_occupancy_prob": 0,
        # "max_steps": 5, 
    },
    "model": {
        "num_resBlocks": 4,
        "num_filters": 64,
        "history_length": 8, # Number of current and previous states to consider, 1 for current state only
    },
    "learn": {
        # Relevant for Expert Iteration Only
        # "num_learn_iters": 100,
        # "num_self_play_iters": 500,
        "num_parallel_games": 100,
        # "use_curriculum": True,
        # "curriculum_success_threshold": 0.95,
        # Relevant for Behavior Cloning Only
        "train_dataset": "maze_4to50_0to0p3_500000",
        "val_dataset": "maze_4to50_0to0p3_50000",
        "val_batch_size": 128,
        # "patience": 5,
        # Relevant for both Expert Iteration and Behavior Cloning
        "num_train_epochs": 15,
        "train_batch_size": 64,
        "lr": 0.001,
        "weight_decay": 0.0001,
        "save_every": 5,
        "use_wandb": True,
        # "load_checkpoint": "maze_4to10_rtg_model_99",
    }
})

In [3]:
import pandas as pd

def summarize_results(results, name):
    df = pd.DataFrame(results, columns=['termination_case', 'value'])
    percentages = df['termination_case'].value_counts(normalize=True) * 100

    # Compute average optimal path ratio for successful runs
    average_optimal_path_ratio = df['value'].mean()

    print(f"Results for {name}:")
    print(f"% Target Reached: {percentages.get(SearchAlgorithm.TerminationCase.TARGET_REACHED, 0):.2f}%")
    print(f"% Timed Out: {percentages.get(SearchAlgorithm.TerminationCase.TIMEOUT, 0):.2f}%")
    print(f"% Collided: {percentages.get(SearchAlgorithm.TerminationCase.COLLISION, 0):.2f}%")
    print(f"Average path_length/optimal_path_length: {average_optimal_path_ratio:.4f}\n")

In [4]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# bc_model = ResNet(cfg.model, device)
# # model.load_state_dict(torch.load(f"checkpoints/{cfg.name}_best_model.pt"))
# # bc_model.load_state_dict(torch.load(f"../checkpoints/maze_4to50_0to0p3_500000_bc_model_{14}.pt", weights_only=True))
# bc_model.load_state_dict(torch.load(f"../checkpoints/maze_4to50_0to0p3_50000_bc_model_{10}.pt", weights_only=True))
# exIt_model = ResNet(cfg.model, device)
# exIt_model.load_state_dict(torch.load(f"../checkpoints/maze_4to100_hist8_model_{99}.pt", weights_only=True))

# total_runs = 100
# bc_results = []
# exIt_results = []
# for i in trange(total_runs):
#     maze = Maze(10, 10, 0, "ShortestPath")
#     # maze.visualize_path()
#     bc_alg = GreedyAlgorithm(search_cfg=None, model=bc_model)
#     exIt_alg = GreedyAlgorithm(search_cfg=None, model=exIt_model)
#     bc_results.append(bc_alg.play_game(maze, verbose=False, visualize=False))
#     exIt_results.append(exIt_alg.play_game(maze, verbose=False, visualize=False))
# summarize_results(bc_results, 'Behavior Cloning')
# summarize_results(exIt_results, 'Expert Imitation')


In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

models_to_test = {
    "Behavior Cloning 500k": f"../checkpoints/maze_4to50_0to0p3_500000_bc_model_{14}.pt",
    "Behavior Cloning 50k": f"../checkpoints/maze_4to50_0to0p3_50000_bc_model_{10}.pt",
    "Expert Iteration n_sim 50": f"../checkpoints/maze_4to100_hist8_model_{99}.pt",
    "Expert Iteration n_sim 300": f"../checkpoints/maze_4to100_hist8_numsim300_model_{99}.pt",
}
models = {}
for name, path in models_to_test.items():
    model = ResNet(cfg.model, device)
    model.load_state_dict(torch.load(path, weights_only=True))
    models[name] = model

n_rounds = 3
for n_round in range(n_rounds):
    n_batches = 10
    total_runs = n_batches * cfg.learn.num_parallel_games
    results = {model_name: [] for model_name in models.keys()}
    algs = {model_name: GreedyAlgorithm(search_cfg=None, model=model) for model_name, model in models.items()}


    for _ in trange(n_batches):
        maze_params = Maze.generate_maze_params(cfg.learn.num_parallel_games, maze_cfg=cfg.maze)
        mazes = [Maze(*params) for params in maze_params]
        for model_name, alg in algs.items():
            results[model_name].extend(alg.play_game_batch(mazes, verbose=False, visualize=False))
    print(f"round: {n_round} total_runs: {total_runs}")
    for model_name, model_results in results.items():
        summarize_results(model_results, model_name)

  0%|          | 0/10 [00:00<?, ?it/s]

round: 0 total_runs: 1000
Results for Behavior Cloning 500k:
% Target Reached: 89.60%
% Timed Out: 2.20%
% Collided: 8.20%
Average path_length/optimal_path_length: 1.0716

Results for Behavior Cloning 50k:
% Target Reached: 78.40%
% Timed Out: 15.00%
% Collided: 6.60%
Average path_length/optimal_path_length: 1.1321

Results for Expert Iteration n_sim 50:
% Target Reached: 78.80%
% Timed Out: 20.70%
% Collided: 0.50%
Average path_length/optimal_path_length: 1.1797

Results for Expert Iteration n_sim 300:
% Target Reached: 78.60%
% Timed Out: 20.80%
% Collided: 0.60%
Average path_length/optimal_path_length: 1.7248



  0%|          | 0/10 [00:00<?, ?it/s]

round: 1 total_runs: 1000
Results for Behavior Cloning 500k:
% Target Reached: 92.00%
% Timed Out: 3.20%
% Collided: 4.80%
Average path_length/optimal_path_length: 1.0737

Results for Behavior Cloning 50k:
% Target Reached: 81.60%
% Timed Out: 12.70%
% Collided: 5.70%
Average path_length/optimal_path_length: 1.1313

Results for Expert Iteration n_sim 50:
% Target Reached: 82.50%
% Timed Out: 17.10%
% Collided: 0.40%
Average path_length/optimal_path_length: 1.1606

Results for Expert Iteration n_sim 300:
% Target Reached: 79.30%
% Timed Out: 20.30%
% Collided: 0.40%
Average path_length/optimal_path_length: 1.7228



  0%|          | 0/10 [00:00<?, ?it/s]

round: 2 total_runs: 1000
Results for Behavior Cloning 500k:
% Target Reached: 92.10%
% Timed Out: 2.40%
% Collided: 5.50%
Average path_length/optimal_path_length: 1.0678

Results for Behavior Cloning 50k:
% Target Reached: 82.20%
% Timed Out: 13.30%
% Collided: 4.50%
Average path_length/optimal_path_length: 1.1456

Results for Expert Iteration n_sim 50:
% Target Reached: 79.70%
% Timed Out: 19.50%
% Collided: 0.80%
Average path_length/optimal_path_length: 1.1782

Results for Expert Iteration n_sim 300:
% Target Reached: 77.90%
% Timed Out: 21.20%
% Collided: 0.90%
Average path_length/optimal_path_length: 1.7114

