# Multi-Lifetime Metrics Evaluation

In [None]:
# (c) 2019 The Johns Hopkins University Applied Physics Laboratory LLC (JHU/APL).
# All Rights Reserved. This material may be only be used, modified, or reproduced
# by or for the U.S. Government pursuant to the license rights granted under the
# clauses at DFARS 252.227-7013/7014 or FAR 52.227-14. For any other permission,
# please contact the Office of Technology Transfer at JHU/APL.

# NO WARRANTY, NO LIABILITY. THIS MATERIAL IS PROVIDED “AS IS.” JHU/APL MAKES NO
# REPRESENTATION OR WARRANTY WITH RESPECT TO THE PERFORMANCE OF THE MATERIALS,
# INCLUDING THEIR SAFETY, EFFECTIVENESS, OR COMMERCIAL VIABILITY, AND DISCLAIMS
# ALL WARRANTIES IN THE MATERIAL, WHETHER EXPRESS OR IMPLIED, INCLUDING (BUT NOT
# LIMITED TO) ANY AND ALL IMPLIED WARRANTIES OF PERFORMANCE, MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT OF INTELLECTUAL PROPERTY
# OR OTHER THIRD PARTY RIGHTS. ANY USER OF THE MATERIAL ASSUMES THE ENTIRE RISK
# AND LIABILITY FOR USING THE MATERIAL. IN NO EVENT SHALL JHU/APL BE LIABLE TO ANY
# USER OF THE MATERIAL FOR ANY ACTUAL, INDIRECT, CONSEQUENTIAL, SPECIAL OR OTHER
# DAMAGES ARISING FROM THE USE OF, OR INABILITY TO USE, THE MATERIAL, INCLUDING,
# BUT NOT LIMITED TO, ANY DAMAGES FOR LOST PROFITS.

In [None]:
# Import necessary modules
import json
from pathlib import Path

import matplotlib
import pandas as pd
import scipy
import seaborn as sns

from evaluation.evaluate import (compute_eval_metrics,
                                 load_computational_costs,
                                 load_performance_thresholds,
                                 load_task_similarities,
                                 unzip_logs)

sns.set_style("dark")
sns.set_context("paper")

pd.options.display.float_format = '{:,.2f}'.format
matplotlib.use('Agg')

In [None]:
# Configure metrics report
kwargs = {}
kwargs['eval_dir'] = Path('../../example_eval/m12_eval/')
kwargs['ste_dir'] = 'agent_config'
kwargs['ste_averaging_method'] = 'metrics'
kwargs['perf_measure'] = 'performance'
kwargs['aggregation_method'] = 'mean'
kwargs['maintenance_method'] = 'both'
kwargs['transfer_method'] = 'both'
kwargs['normalization_method'] = 'task'
kwargs['smoothing_method'] = 'flat'
kwargs['window_length'] = None
kwargs['clamp_outliers'] = True
kwargs['data_range_file'] = ''
kwargs['show_raw_data'] = True
kwargs['show_eval_lines'] = True
kwargs['do_store_ste'] = True
kwargs['do_plot'] = True
kwargs['do_save_plots'] = True
kwargs['do_save'] = True
kwargs['do_save_settings'] = True
kwargs['output_dir'] = Path('results/example_results')

output = 'example_metrics'
do_unzip = False

# Create output directory if it doesn't exist
kwargs['output_dir'].mkdir(parents=True, exist_ok=True)

In [None]:
# Unzip logs
if do_unzip:
    unzip_logs(eval_dir)

In [None]:
# Compute metrics for lifelong learning logs
ll_metrics_df, ll_metrics_dicts, log_data_df = compute_eval_metrics(**kwargs)

In [None]:
# Show mean and standard deviation of data
ll_metrics_df.drop(columns=['min', 'max', 'num_lx', 'num_ex']).groupby(
    by=['scenario_type', 'complexity', 'difficulty']).agg(['mean', 'std'])

In [None]:
# Show median and IQR of data
ll_metrics_df.drop(columns=['min', 'max', 'num_lx', 'num_ex']).groupby(
    by=['scenario_type', 'complexity', 'difficulty']).agg(['median', scipy.stats.iqr])

In [None]:
# Save the lifelong learning metrics DataFrame
if kwargs['do_save']:
    if not ll_metrics_df.empty:
        with open(kwargs['output_dir'] / (output + '.tsv'), 'w', newline='\n') as metrics_file:
            ll_metrics_df.set_index(['sg_name', 'agent_config', 'run_id']).sort_values(
                ['agent_config', 'run_id']).to_csv(metrics_file, sep='\t')
    if ll_metrics_dicts:    
        with open(kwargs['output_dir'] / (output + '.json'), 'w', newline='\n') as metrics_file:
            json.dump(ll_metrics_dicts, metrics_file)
    if not log_data_df.empty:
        log_data_df.reset_index(drop=True).to_feather(kwargs['output_dir'] / (output + '_data.feather'))