## Introduction

This script demonstrates my own personal simulation output manager, which I call Model Output Manager or MOM for short.

In [14]:
import shutil
import os
from pathlib import Path
import numpy as np
import pandas as pd
import pickle as pkl

import model_output_manager as mom


eig = np.linalg.eig

## Saving model output

In [15]:
# %% Parameters for model
seed = 10
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
# Store in a dict
table_params = dict(run_name=name, seed=seed, scale=scale)  # Parameters for keeping in a table
params = table_params.copy()
params['param_matrix'] = param_matrix  # Extra model parameters that can't be stored in a table

# %% Using these parameters we create model output
def run_model(seed, scale):
    np.random.seed(seed)
    d = scale * np.random.randn(1000, 1000)
    lams, ev = eig(d)
    lams = np.sort(lams)[::-1]
    output = {"lam1": np.real(lams[0])}
    return output
output = run_model(seed, scale)

In [16]:
# Now we want to save the model output and the parameters used to generate it in an organized way.
# That's where model_output_manager.save_model comes in.
shutil.rmtree("test_output", ignore_errors=True)  # Clear away any output that was there before
path = Path("test_output")  # Path to the table. Note that the table's name is going to be "run_log.csv"
# run_exists = mom.run_exists(table_params, path)
# if not run_exists:
run_id = mom.get_run_entry(table_params, path)
run_dir = Path(f"test_output/run_{run_id}/")
run_dir.mkdir(exist_ok=True)


In [17]:
# Here we see that a table has been created with a row that corresponds to table_params 
table_path = path/'run_table.csv'
table = pd.read_csv(table_path, index_col=0)
print(table)

      run_name  seed  scale
index                      
0      Testing    10      5


In [18]:
# A corresponding folder has also been created, called 'run_0':
print(os.listdir('test_output'))
# But this folder is currently empty:
print(os.listdir('test_output/run_0'))

['run_table.csv', 'run_0']
[]


In [19]:
# We can now use run_dir to save our output:
with open(run_dir/'out.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(os.listdir(run_dir))

['out.pkl']


In [20]:
## Now we create a new run with different parameters. A new row will be added to the table
seed = 5
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
table_params = dict(run_name=name, seed=seed, scale=scale)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed, scale)
# run_exists = mom.run_exists(table_params, path)
run_id = mom.get_run_entry(table_params, path)
run_dir = Path(f"test_output/run_{run_id}/")
run_dir.mkdir(exist_ok=True)
with open(run_dir/'out.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(pd.read_csv(table_path, index_col=0))

  run_name  seed  scale
0  Testing    10      5
1  Testing     5      5


In [21]:
# Now let's see what happens when we add another parameter to our model. Default behavior is to prompt the user to
# pick a variable to assign to the new parameter for previous runs.
scale = 5
name = "Testing"
dummy = 8  # New parameter
param_matrix = scale * np.eye(3)
#run_id = mom.get_run_entry(table_params, path)
run_dir = Path(f"test_output/run_{run_id}/")
run_dir.mkdir(exist_ok=True)
with open(run_dir/'out.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(pd.read_csv(table_path, index_col=0))
# In the input prompt, choose any value. Default is 'na' if no value is chosen.

  run_name  seed  scale
0  Testing    10      5
1  Testing     5      5


In [22]:
# Now let's see what happens when we try to save model output with less parameters. It throws an error and asks
# the user to specify these parameters.
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
table_params = dict(run_name=name, scale=scale)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed=0, scale=scale)
run_id = mom.get_run_entry(table_params, path)
run_dir = Path(f"test_output/run_{run_id}/")
run_dir.mkdir(exist_ok=True)
with open(run_dir/'out.pkl', 'wb') as fid:
    pkl.dump(output, fid)
print(pd.read_csv(path, index_col=0))


The following keys are in the run table but not in param_dict.
Please specify these keys in param_dict:
{'seed'}


ValueError: Missing parameter keys.

## Loading model using parameters

In [23]:
if mom.run_exists(table_params, path):
    run_id = mom.get_run_entry(table_params, path)
    run_dir = Path(f"test_output/run_{run_id}/")
    with open(run_dir/'out.pkl', 'rb') as fid:
        output = pkl.load(fid)
print(output)

{'lam1': 158.73434901080273}


# Memoizing a function

In [33]:
shutil.rmtree("test_output", ignore_errors=True)  # Clear away any output that was there before

memory = mom.Memory(path)

@memory.cache(verbose=1)
def run_model(seed, scale):
    np.random.seed(seed)
    d = scale * np.random.randn(1000, 1000)
    lams, ev = eig(d)
    lams = np.sort(lams)[::-1]
    return np.real(lams[0])

In [34]:
%time run_model(0, .2)

Calling run_model with arguments:
seed     0.0
scale    0.2
CPU times: user 8.42 s, sys: 1.33 s, total: 9.75 s
Wall time: 1.59 s


6.349373960432065

In [35]:
%time run_model(0, .2) # Faster this time

Calling run_model with arguments:
seed     0.0
scale    0.2
CPU times: user 10.4 ms, sys: 7.9 ms, total: 18.3 ms
Wall time: 5.72 ms


6.349373960432065