# Multi-Lifetime Metrics Evaluation

In [None]:
# (c) 2019 The Johns Hopkins University Applied Physics Laboratory LLC (JHU/APL).
# All Rights Reserved. This material may be only be used, modified, or reproduced
# by or for the U.S. Government pursuant to the license rights granted under the
# clauses at DFARS 252.227-7013/7014 or FAR 52.227-14. For any other permission,
# please contact the Office of Technology Transfer at JHU/APL.

# NO WARRANTY, NO LIABILITY. THIS MATERIAL IS PROVIDED “AS IS.” JHU/APL MAKES NO
# REPRESENTATION OR WARRANTY WITH RESPECT TO THE PERFORMANCE OF THE MATERIALS,
# INCLUDING THEIR SAFETY, EFFECTIVENESS, OR COMMERCIAL VIABILITY, AND DISCLAIMS
# ALL WARRANTIES IN THE MATERIAL, WHETHER EXPRESS OR IMPLIED, INCLUDING (BUT NOT
# LIMITED TO) ANY AND ALL IMPLIED WARRANTIES OF PERFORMANCE, MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT OF INTELLECTUAL PROPERTY
# OR OTHER THIRD PARTY RIGHTS. ANY USER OF THE MATERIAL ASSUMES THE ENTIRE RISK
# AND LIABILITY FOR USING THE MATERIAL. IN NO EVENT SHALL JHU/APL BE LIABLE TO ANY
# USER OF THE MATERIAL FOR ANY ACTUAL, INDIRECT, CONSEQUENTIAL, SPECIAL OR OTHER
# DAMAGES ARISING FROM THE USE OF, OR INABILITY TO USE, THE MATERIAL, INCLUDING,
# BUT NOT LIMITED TO, ANY DAMAGES FOR LOST PROFITS.

In [None]:
# Import necessary modules
import json
from pathlib import Path

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import scipy
import seaborn as sns
from evaluation.evaluate import (compute_eval_metrics,
                                 load_computational_costs,
                                 load_performance_thresholds,
                                 load_task_similarities, plot_summary,
                                 save_ste_data, unzip_logs)

sns.set_style("dark")
sns.set_context("paper")

pd.options.display.float_format = '{:,.2f}'.format
%matplotlib ipympl

In [None]:
# Specify top-level directory for evaluation
# The path should be that of an evaluation directory (e.g., example_eval/m9_eval/).
log_dir = Path("example_eval/m9_eval")

In [None]:
# Configure metrics report
ste_dir = 'agent_config-0'
perf_measure = 'performance'
transfer_method = 'both'
output_dir = Path('sg_results')
output = 'll_metrics.tsv'
do_unzip = False
do_smoothing = True
do_normalize = True
remove_outliers = True
save_ste = True
do_plot = True
save_plots = True
do_save = True

# Create output directory if it doesn't exist
output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# Load computational cost data
# comp_cost_df = load_computational_costs(eval_dir)
# comp_cost_df

In [None]:
# Load performance threshold data
# perf_thresh_df = load_performance_thresholds(eval_dir)
# perf_thresh_df

In [None]:
# Load task similarity data
# task_similarity_df = load_task_similarities(eval_dir)
# task_similarity_df

In [None]:
# Unzip logs
if do_unzip:
    unzip_logs(eval_dir)

In [None]:
# Compute metrics for lifelong learning logs
matplotlib.use('Agg')
ll_metrics_df = compute_eval_metrics(eval_dir=eval_dir, ste_dir=ste_dir, output_dir=output_dir,
                                     perf_measure=perf_measure, transfer_method=transfer_method,
                                     do_smoothing=do_smoothing, do_normalize=do_normalize,
                                     remove_outliers=remove_outliers, save_plots=save_plots,
                                     do_save_ste=save_ste)

In [None]:
# Show mean and standard deviation of data
ll_metrics_df.drop(columns=['min', 'max']).groupby(by=['complexity', 'difficulty']).agg(['mean', 'std'])

In [None]:
# Show median and IQR of data
ll_metrics_df.drop(columns=['min', 'max']).groupby(by=['complexity', 'difficulty']).agg(['median', scipy.stats.iqr])

In [None]:
# Plot aggregated data
if do_plot:
    matplotlib.use('TkAgg')
    plot(ll_metrics_df)

In [None]:
# Save the lifelong learning metrics DataFrame
if do_save:
    with open(output_dir / output, 'w', newline='\n') as metrics_file:
        ll_metrics_df.set_index(['sg_name', 'agent_config', 'run_id']).sort_values(['agent_config', 'run_id']).to_csv(metrics_file, sep='\t')