## Introduction

This script demonstrates my own personal simulation output manager, which I call Model Output Manager or MOM for short.

In [86]:
import shutil
import os
import numpy as np
import pandas as pd
import pickle as pkl

import model_output_manager as mom


eig = np.linalg.eig

## Saving model output

In [87]:
# %% Parameters for model
seed = 10
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
# Store in a dict
table_params = dict(run_name=name, seed=seed, scale=scale)  # Parameters for keeping in a table
params = table_params.copy()
params['param_matrix'] = param_matrix  # Extra model parameters that can't be stored in a table

# %% Using these parameters we create model output
def run_model(seed, scale):
    np.random.seed(seed)
    d = scale * np.random.randn(1000, 1000)
    lams, ev = eig(d)
    lams = np.sort(lams)[::-1]
    output = {"lam1": np.real(lams[0])}
    return output
output = run_model(seed, scale)

In [88]:
# Now we want to save the model output and the parameters used to generate it in an organized way.
# That's where model_output_manager.save_model comes in.
shutil.rmtree("test_output", ignore_errors=True)  # Clear away any output that was there before
path = "test_output/run_log.csv"  # Path to the table. Note that the table's name is going to be "run_log.csv"

dirs, ids, output_exists = mom.get_dirs_and_ids_for_run(table_params, path)
if len(dirs) == 0:
    run_id, run_dir = mom.make_dir_for_run(table_params, path)
else:
    run_id = ids[0]
    run_dir = dirs[0]

In [89]:
# Here we see that a table has been created with a row that corresponds to table_params 
table = pd.read_csv(path, index_col=0)
print(table)

  run_name  seed  scale  run_number
0  Testing    10      5           0


In [90]:
# A corresponding folder has also been created, called 'run_0':
print(os.listdir('test_output'))
# But this folder is currently empty:
print(os.listdir('test_output/run_0'))

['run_log.csv', 'run_0']
[]


In [91]:
# We can now use run_dir to save our output:
with open(run_dir/'output.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(os.listdir(run_dir))

['output.pkl']


In [92]:
## Now we create a new run with different parameters. A new row will be added to the table
seed = 5
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
table_params = dict(run_name=name, seed=seed, scale=scale)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed, scale)
dirs, ids, output_exists = mom.get_dirs_and_ids_for_run(table_params, path)
if len(dirs) == 0:
    run_id, run_dir = mom.make_dir_for_run(table_params, path)
else:
    run_id = ids[0]
    run_dir = dirs[0]
with open(run_dir/'output.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(pd.read_csv(path, index_col=0))

  run_name  run_number  scale  seed
0  Testing           0      5    10
1  Testing           0      5     5


In [93]:
# Now let's see what happens when we add another parameter to our model. It works by adding np.nan where appropriate.
seed = 5
scale = 5
name = "Testing"
dummy = 8  # New parameter
param_matrix = scale * np.eye(3)
table_params = dict(run_name=name, seed=seed, scale=scale, dummy=dummy)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed, scale)
dirs, ids, output_exists = mom.get_dirs_and_ids_for_run(table_params, path)
if len(dirs) == 0:
    run_id, run_dir = mom.make_dir_for_run(table_params, path)
else:
    run_id = ids[0]
    run_dir = dirs[0]
with open(run_dir/'output.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(pd.read_csv(path, index_col=0))


  dummy run_name  run_number  scale  seed
0    na  Testing           0      5    10
1    na  Testing           0      5     5
2     8  Testing           0      5     5


In [94]:
# Now let's see what happens when we try to save model output with less parameters. It is handled gracefully.
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
table_params = dict(run_name=name, scale=scale)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed=0, scale=scale)
dirs, ids, output_exists = mom.get_dirs_and_ids_for_run(table_params, path)
if len(dirs) == 0:
    run_id, run_dir = mom.make_dir_for_run(table_params, path)
else:
    run_id = ids[0]
    run_dir = dirs[0]
with open(run_dir/'output.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(pd.read_csv(path, index_col=0))


  dummy run_name  run_number  scale seed
0    na  Testing           0      5   10
1    na  Testing           0      5    5
2     8  Testing           0      5    5
3    na  Testing           0      5   na


## Loading model using parameters

In [96]:
load_params = dict(run_name='Testing', seed=5, scale=5, dummy=8)  # We want to load a run with these parameter values
dirs, ids, output_exists = mom.get_dirs_and_ids_for_run(load_params, path)

In [97]:
print(dirs) # The directories that match load_params
run_dir = dirs[0]

[PosixPath('test_output/run_2')]


In [98]:
# Now load.
with open(run_dir/'output.pkl', 'rb') as fid:
    output_loaded = pkl.load(fid)

In [99]:
output_loaded

{'lam1': 160.46450778794286}