In [1]:
import sys

sys.path.append('../..')

# Loading data

In [2]:
import os

import numpy as np

from academia.curriculum import LearningStats

pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
DQN_OUTPUT_DIR = './outputs/DQNAgent/'
PPO_OUTPUT_DIR = './outputs/PPOAgent/'
CURRICULUM_N_TASKS = 3

In [4]:
def load_curr_stats(output_dir: str) -> list[dict[str, LearningStats]]:
    result: list[dict[str, LearningStats]] = []
    for dir_name in os.listdir(output_dir):
        # load only curriculum results
        if not dir_name.startswith('curriculum'):
            continue
        dir_path = os.path.join(output_dir, dir_name)
        curr_stats: dict[str, LearningStats] = {}
        # load each stats object
        for i in range(1, CURRICULUM_N_TASKS + 1):
            stats_path = os.path.join(dir_path, f'{i}.stats.json')
            curr_stats[f'Curriculum task {i}'] = LearningStats.load(stats_path)
        result.append(curr_stats)
    return result

def load_nocurr_stats(output_dir: str) -> list[dict[str, LearningStats]]:
    result: list[dict[str, LearningStats]] = []
    for dir_name in os.listdir(output_dir):
        # load only curriculum results
        if not dir_name.startswith('nocurriculum'):
            continue
        stats_path = os.path.join(output_dir, dir_name, 'stats.stats.json')
        task_stats = {'No Curriculum task': LearningStats.load(stats_path)}
        result.append(task_stats)
    return result

In [5]:
dqn_stats_curr = load_curr_stats(DQN_OUTPUT_DIR)
ppo_stats_curr = load_curr_stats(PPO_OUTPUT_DIR)

dqn_stats_nocurr = load_nocurr_stats(DQN_OUTPUT_DIR)
ppo_stats_nocurr = load_nocurr_stats(PPO_OUTPUT_DIR)

# Visualisations

In [6]:
from academia.tools import visualizations as vis

## PPO

In [7]:
vis.plot_trajectories(
    [ppo_stats_nocurr, ppo_stats_curr],
    time_domain='steps',
    task_trace_start='mean',
    show_std=True,
    show_stop_time=True,
    save_format='svg',
    save_path='./outputs/plots/ppo_doorkey_curr_vs_nocurr.svg'
)

## DQN

### All runs visualized

In [8]:
vis.plot_trajectories(
    [dqn_stats_nocurr, dqn_stats_curr],
    time_domain='steps',
    task_trace_start='mean',
    show_std=True,
    show_stop_time=True,
)

In [9]:
vis.plot_trajectories(
    [dqn_stats_nocurr, dqn_stats_curr],
    time_domain='steps',
    task_trace_start='mean',
    show_run_traces=True,
    show_stop_time=True,
)

### Successful vs unsuccessful runs

In [10]:
dqn_stats_curr_good = [
    curr_stats
    for curr_stats in dqn_stats_curr
    if curr_stats['Curriculum task 3'].agent_evaluations[-1] >= 0.9
]
dqn_stats_curr_bad = [
    curr_stats
    for curr_stats in dqn_stats_curr
    if curr_stats['Curriculum task 3'].agent_evaluations[-1] < 0.9
]

dqn_stats_nocurr_good = [
    task_stats
    for task_stats in dqn_stats_nocurr
    if task_stats['No Curriculum task'].agent_evaluations[-1] >= 0.9
]
dqn_stats_nocurr_bad = [
    task_stats
    for task_stats in dqn_stats_nocurr
    if task_stats['No Curriculum task'].agent_evaluations[-1] < 0.9
]

In [11]:
print('+---------------+-----------------+-------------------+')
print('|               | Successful runs | Unsuccessful runs |')
print('+---------------+-----------------+-------------------+')
print(f'| Curriculum    | {str(len(dqn_stats_curr_good)).ljust(15)} | {str(len(dqn_stats_curr_bad)).ljust(17)} |')
print(f'| No Curriculum | {str(len(dqn_stats_nocurr_good)).ljust(15)} | {str(len(dqn_stats_nocurr_bad)).ljust(17)} |')
print('+---------------+-----------------+-------------------+')

+---------------+-----------------+-------------------+
|               | Successful runs | Unsuccessful runs |
+---------------+-----------------+-------------------+
| Curriculum    | 11              | 9                 |
| No Curriculum | 13              | 7                 |
+---------------+-----------------+-------------------+


### Successful runs visualized

In [12]:
vis.plot_trajectories(
    [dqn_stats_nocurr_good, dqn_stats_curr_good],
    time_domain='steps',
    task_trace_start='mean',
    show_std=True,
    show_stop_time=True,
    save_format='svg',
    save_path='./outputs/plots/dqn_good_doorkey_curr_vs_nocurr.svg'
)

### Unsuccessful runs visualized

In [13]:
vis.plot_trajectories(
    [dqn_stats_nocurr_bad, dqn_stats_curr_bad],
    time_domain='steps',
    task_trace_start='mean',
    show_std=True,
    show_stop_time=True,
    save_format='svg',
    save_path='./outputs/plots/dqn_bad_doorkey_curr_vs_nocurr.svg'
)