[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RobertTLange/mle-logging/blob/main/examples/02_advanced.ipynb)

In [1]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

from mle_logging import MLELogger

# Logging of Same Configuration with Different Random Seeds

If you provide a .json file path and a seed_id, the log will be created in a sub-directory.

Furthermore, the .json file will be copied for reproducibility.

Multiple simultanous runs (different seeds) can now log to the same directory. Everything else remains the same.

In [2]:
# Instantiate logging to experiment_dir for two random seeds
log_seed_1 = MLELogger(time_to_track = ['num_updates', 'num_epochs'],
                       what_to_track = ['train_loss', 'test_loss'],
                       experiment_dir = "multi_seed_dir/",
                       config_fname = "config_1.json",     # Provide path to .json config
                       seed_id = "seed_1")                 # Provide some seed identifier (str)   

log_seed_2 = MLELogger(time_to_track = ['num_updates', 'num_epochs'],
                       what_to_track = ['train_loss', 'test_loss'],
                       experiment_dir = "multi_seed_dir/",
                       config_fname = "config_1.json",     # Provide path to .json config
                       seed_id = "seed_2")                 # Provide some seed identifier (str)   

In [3]:
# Save some time series statistics
time_tic = {'num_updates': 10,
            'num_epochs': 1}
stats_tic = {'train_loss': 0.1234,
             'test_loss': 0.1235}

# Update the log with collected data & save it to .hdf5
log_seed_1.update(time_tic, stats_tic, save=True)
log_seed_2.update(time_tic, stats_tic, save=True)

In [4]:
import os
from mle_logging.merge import merge_hdf5_files

exp_dir = "multi_seed_dir/21-08-04_config_1/"
log_dir = os.path.join(exp_dir, "logs/")
merge_hdf5_files(new_filename = os.path.join(log_dir, "seed_aggregated.hdf5"),
                 log_paths = [os.path.join(log_dir, "21-08-04_config_1_seed_1.hdf5"),
                              os.path.join(log_dir, "21-08-04_config_1_seed_2.hdf5")],
                 delete_files=True)

In [5]:
from mle_logging import load_log

log = load_log(exp_dir)
log.eval_ids

['seed_1', 'seed_2']

In [6]:
log = load_log(exp_dir, aggregate_seeds=True)
log.eval_ids

Only single configuration/evaluation loaded.


In [7]:
log.stats.train_loss.keys()

odict_keys(['mean', 'std', 'p50', 'p10', 'p25', 'p75', 'p90'])

## Logging of Different Configurations

In [8]:
# Instantiate logging to experiment_dir for two .json configurations
log_config_1 = MLELogger(time_to_track = ['num_updates', 'num_epochs'],
                         what_to_track = ['train_loss', 'test_loss'],
                         experiment_dir = "multi_config_dir/",
                         config_fname = "config_1.json",     # Provide path to .json config
                         seed_id = "seed_0")                 # Provide some seed identifier (str)   

log_config_2 = MLELogger(time_to_track = ['num_updates', 'num_epochs'],
                         what_to_track = ['train_loss', 'test_loss'],
                         experiment_dir = "multi_config_dir/",
                         config_fname = "config_2.json",     # Provide path to .json config
                         seed_id = "seed_0")                 # Provide some seed identifier (str)   

In [9]:
# Update the log with collected data & save it to .hdf5
log_config_1.update(time_tic, stats_tic, save=True)
log_config_2.update(time_tic, stats_tic, save=True)

In [10]:
from mle_logging import merge_meta_log

In [11]:
merge_hdf5_files(new_filename = "multi_config_dir/21-08-04_config_1/logs/config_1.hdf5",
                 log_paths = ["multi_config_dir/21-08-04_config_1/logs/21-08-04_config_1_seed_0.hdf5"],
                 delete_files=True)
merge_hdf5_files(new_filename = "multi_config_dir/21-08-04_config_1/logs/config_2.hdf5",
                 log_paths = ["multi_config_dir/21-08-04_config_2/logs/21-08-04_config_2_seed_0.hdf5"],
                 delete_files=True)

meta_log = merge_meta_log(experiment_dir = "multi_config_dir/",
                          all_run_ids = ["config_1", "config_2"])

In [12]:
meta_log.eval_ids

['config_1', 'config_2']

In [13]:
meta_log.config_1.stats

DotMap(test_loss=DotMap(mean=masked_array(data=[0.12349999696016312],
             mask=[False],
       fill_value=1e+20), std=masked_array(data=[0.0],
             mask=[False],
       fill_value=1e+20), p50=array([0.1235]), p10=array([0.1235]), p25=array([0.1235]), p75=array([0.1235]), p90=array([0.1235])), train_loss=DotMap(mean=masked_array(data=[0.1234000027179718],
             mask=[False],
       fill_value=1e+20), std=masked_array(data=[0.0],
             mask=[False],
       fill_value=1e+20), p50=array([0.1234]), p10=array([0.1234]), p25=array([0.1234]), p75=array([0.1234]), p90=array([0.1234])))