# Multi-Lifetime Metrics Evaluation

In [None]:
# Import necessary modules
import json
from pathlib import Path

import l2metrics
import matplotlib.pyplot as plt
import pandas as pd
import scipy
import seaborn as sns
from tqdm.notebook import tqdm

sns.set_style("dark")
sns.set_context("paper")

%matplotlib ipympl

In [None]:
# Configure metrics report
perf_measure = 'performance'
transfer_method = 'contrast'
do_smoothing = False

In [None]:
# Specify top-level directory for evaluation
# The path should be that of an agent configuration directory within an evaluation directory
# (e.g., m9_eval/agent_config-0/).
log_dir = Path("example_eval/m9_eval/agent_config-0")

In [None]:
# Check for STE logs
ste_log_dir = log_dir / 'ste_logs' / 'ste_logs'

if ste_log_dir.exists():
    # Store all the STE data found in the directory
    print('Storing STE data...')
    for ste_dir in ste_log_dir.iterdir():
        if ste_dir.is_dir():
            l2metrics.util.save_ste_data(str(ste_dir))
    print('Done storing STE data!\n')
else:
    # STE log path not found - possibly because comrpressed archive has not been
    # extracted in the same location yet
    raise FileNotFoundError(f"STE logs not found in expected location!")

In [None]:
# Check for LL logs
ll_log_dir = log_dir / 'll_logs'

if ll_log_dir.exists():
    print('Computing metrics from LL logs...')

    # Initialize LL metric dataframe
    ll_metrics_df = pd.DataFrame()

    # Compute and store the LL metrics for all scenarios found in the directory
    for path in tqdm(list(ll_log_dir.iterdir()), desc='Overall'):
        if path.is_dir():
            for sub_path in tqdm(list(path.iterdir()), desc=path.name):
                if sub_path.is_dir():
                    scenario_dir = str(sub_path)

                    # Initialize metrics report
                    report = l2metrics.AgentMetricsReport(
                        log_dir=scenario_dir, perf_measure=perf_measure,
                        transfer_method=transfer_method, do_smoothing=do_smoothing)

                    # Calculate metrics in order of their addition to the metrics list
                    report.calculate()

                    # Append lifetime metrics to dataframe
                    ll_metrics_df = ll_metrics_df.append(
                        report.lifetime_metrics_df, ignore_index=True)

                    # Append scenario complexity and difficulty
                    with open(sub_path / 'scenario_info.json', 'r') as json_file:
                        scenario_info = json.load(json_file)
                        if 'complexity' in scenario_info:
                            ll_metrics_df.at[ll_metrics_df.index[-1], 'complexity'] = scenario_info['complexity']
                        if 'difficulty' in scenario_info:
                            ll_metrics_df.at[ll_metrics_df.index[-1], 'difficulty'] = scenario_info['difficulty']

else:
    raise FileNotFoundError(f"LL logs not found in expected location!")

In [None]:
# Sort data by complexity and difficulty
ll_metrics_df = ll_metrics_df.sort_values(by=['complexity', 'difficulty'])
ll_metrics_df.groupby(by=['complexity', 'difficulty']).agg(['mean', 'std'])

In [None]:
ll_metrics_df.groupby(by=['complexity', 'difficulty']).agg(['median', scipy.stats.iqr])

In [None]:
# Plot aggregated data
fig = plt.figure(figsize=(12, 8))

for index, metric in enumerate(ll_metrics_df.drop(columns=['complexity', 'difficulty']).columns, start=1):
    # Create subplot for current metric
    ax = fig.add_subplot(3, 3, index)

    # Create grouped violin plot
    sns.violinplot(x='complexity', y=metric, hue='difficulty', data=ll_metrics_df, palette='muted')

    # Resize legend font
    plt.setp(ax.get_legend().get_title(), fontsize='8')
    plt.setp(ax.get_legend().get_texts(), fontsize='6')

fig.subplots_adjust(wspace=0.35, hspace=0.35)