# Multi-Lifetime Metrics Evaluation

In [None]:
# Import necessary modules
import json
import os

import l2metrics
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm.notebook import tqdm

sns.set_style("dark")
sns.set_context("paper")

%matplotlib ipympl

In [None]:
# Configure metrics report
perf_measure = 'performance'
transfer_method = 'both'
do_smoothing = False

In [None]:
# Specify top-level directory for evaluation
# The path should be that of an agent configuration directory within an evaluation directory
# (e.g., m9_eval/agent_config-0/).
log_dir = "example_eval/m9_eval/agent_config-0"

In [None]:
# Check for STE logs
ste_log_dir = log_dir + "/ste_logs/ste_logs/"

if os.path.isdir(ste_log_dir):
    # Store all the STE data found in the directory
    for ste_task in os.listdir(ste_log_dir):
        l2metrics.util.save_ste_data(ste_log_dir + ste_task)
else:
    # STE log path not found - possibly because comrpressed archive has not been
    # extracted in the same location yet
    raise Exception(f"STE logs not found in expected location!")

In [None]:
# Check for LL logs
ll_log_dir = log_dir + "/ll_logs/"

if os.path.isdir(ll_log_dir):
    # Initialize LL metric dataframe
    ll_metrics_df = pd.DataFrame()

    # Compute and store the LL metrics for all scenarios found in the directory
    for item in tqdm(os.listdir(ll_log_dir), desc='Overall'):
        if os.path.isdir(ll_log_dir + item):
            for scenario in tqdm(os.listdir(ll_log_dir + item), desc='Scenario'):
                scenario_dir = ll_log_dir + item + '/' + scenario + '/'

                # Initialize metrics report
                report = l2metrics.AgentMetricsReport(
                    log_dir=scenario_dir, perf_measure=perf_measure,
                    transfer_method=transfer_method, do_smoothing=do_smoothing)

                # Calculate metrics in order of their addition to the metrics list
                report.calculate()

                # Append lifetime metrics to dataframe
                ll_metrics_df = ll_metrics_df.append(
                    report.lifetime_metrics_df, ignore_index=True)

                # Add scenario name to row
                # ll_metrics_df.at[ll_metrics_df.index[-1], 'scenario'] = scenario.split('-')[0]

                # Append scenario complexity and difficulty
                with open(scenario_dir + 'scenario_info.json', 'r') as json_file:
                    scenario_info = json.load(json_file)
                    if 'complexity' in scenario_info:
                        ll_metrics_df.at[ll_metrics_df.index[-1], 'complexity'] = scenario_info['complexity']
                    if 'difficulty' in scenario_info:
                        ll_metrics_df.at[ll_metrics_df.index[-1], 'difficulty'] = scenario_info['difficulty']

else:
    raise Exception(f"LL logs not found in expected location!")

# Sort data by complexity and difficulty
ll_metrics_df = ll_metrics_df.sort_values(by=['complexity', 'difficulty'])

In [None]:
# Plot aggregated data
fig = plt.figure(figsize=(12, 8))

for index, metric in enumerate(ll_metrics_df.drop(columns=['complexity', 'difficulty']).columns, start=1):
    # Create subplot for current metric
    ax = fig.add_subplot(3, 3, index)

    # Create grouped violin plot
    sns.violinplot(x='complexity', y=metric, hue='difficulty', data=ll_metrics_df, palette='muted')

    # Resize legend font
    plt.setp(ax.get_legend().get_title(), fontsize='8')
    plt.setp(ax.get_legend().get_texts(), fontsize='6')

fig.subplots_adjust(wspace=0.35, hspace=0.35)