## Introduction

This script demonstrates my own personal simulation output manager, which I call Model Output Manager or MOM for short.

In [None]:
import model_output_manager as mom
from collections import OrderedDict
import numpy as np
import shutil
import pandas as pd
eig = np.linalg.eig

## Saving model output

In [33]:
# %% Parameters for model
seed = 10
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
# Store in an OrderedDict
table_params = OrderedDict(run_name=name, seed=seed, scale=scale)  # Parameters for keeping in a table
params = table_params.copy()
params['param_matrix'] = param_matrix  # Extra model parameters that can't be stored in a table

# %% Using these parameters we create model output
def run_model(seed, scale):
    np.random.seed(seed)
    d = scale * np.random.randn(1000, 1000)
    lams, ev = eig(d)
    lams = np.sort(lams)[::-1]
    output = {"lam1": np.real(lams[0])}
    return output
output = run_model(seed, scale)

In [34]:
# Now we want to save the model output and the parameters used to generate it in an organized way.
# That's where model_output_manager.save_model comes in.
shutil.rmtree("test_output", ignore_errors=True)  # Clear away any output that was there before
path = "test_output/run_log.csv"  # Path to the table. Note that the table's name is going to be "run_log.csv"
mom.save_model(table_params=table_params, table_path=path, model_output=output, params=params, 
                                 run_name=name, overwrite_existing=False)


Attempting to write data to '/Users/matt/code_projects/demonstrate_memoization/test_output/Testing_0/model_data.h5'

Done. Data written.


(0, 'test_output/Testing_0/model_data.h5')

In [35]:
# Here we see that a table has been created with a row that corresponds to table_params 
table = pd.read_csv(path, index_col=0)
print(table)

  run_name  seed  scale  run_number
0  Testing    10      5           0


In [36]:
# The parameter 'run_number' is incremented for every run that is duplicated. So if we save the output again,
# we'll get a new row with run_number incremented (unless 'overwrite_existing' is set to True, in which case
# a new row will not be created).
mom.save_model(table_params=table_params, table_path=path, model_output=output, params=params, 
                                 run_name=name, overwrite_existing=False)
print(pd.read_csv(path, index_col=0))


Attempting to write data to '/Users/matt/code_projects/demonstrate_memoization/test_output/Testing_1/model_data.h5'

Done. Data written.
  run_name  seed  scale  run_number
0  Testing    10      5           0
1  Testing    10      5           1


In [37]:
## Now we create a new run with different parameters. A new row will be added to the table
seed = 5
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
table_params = OrderedDict(run_name=name, seed=seed, scale=scale)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed, scale)
mom.save_model(table_params=table_params, table_path=path, model_output=output, params=params, 
                                 run_name=name, overwrite_existing=True)
print(pd.read_csv(path, index_col=0))


Attempting to write data to '/Users/matt/code_projects/demonstrate_memoization/test_output/Testing_2/model_data.h5'

Done. Data written.
  run_name  seed  scale  run_number
0  Testing    10      5           0
1  Testing    10      5           1
2  Testing     5      5           0


In [38]:
# Now let's see what happens when we add another parameter to our model. It works as you'd expect.
seed = 5
scale = 5
name = "Testing"
dummy = 8  # New parameter
param_matrix = scale * np.eye(3)
table_params = OrderedDict(run_name=name, seed=seed, scale=scale, dummy=dummy)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed, scale)
mom.save_model(table_params=table_params, table_path=path, model_output=output, params=params, 
                                 run_name=name, overwrite_existing=True)
print(pd.read_csv(path, index_col=0))



Attempting to write data to '/Users/matt/code_projects/demonstrate_memoization/test_output/Testing_3/model_data.h5'

Done. Data written.
  run_name  seed  scale  dummy  run_number
0  Testing    10      5    NaN           0
1  Testing    10      5    NaN           1
2  Testing     5      5    NaN           0
3  Testing     5      5    8.0           0


In [39]:
# Now let's see what happens when we try to save model output with less parameters. It is handled gracefully.
scale = 5
name = "Testing"
param_matrix = scale * np.eye(3)
table_params = OrderedDict(run_name=name, scale=scale)
params = table_params.copy()
params['param_matrix'] = param_matrix
output = run_model(seed=0, scale=scale)
mom.save_model(table_params=table_params, table_path=path, model_output=output, params=params, 
                                 run_name=name, overwrite_existing=True)
print(pd.read_csv(path, index_col=0))



Attempting to write data to '/Users/matt/code_projects/demonstrate_memoization/test_output/Testing_4/model_data.h5'

Done. Data written.
  run_name  seed  scale  dummy  run_number
0  Testing  10.0      5    NaN           0
1  Testing  10.0      5    NaN           1
2  Testing   5.0      5    NaN           0
3  Testing   5.0      5    8.0           0
4  Testing   NaN      5    NaN           0


## Loading model using parameters

In [40]:
# from matplotlib import pyplot as plt
# plt.imshow(param_matrix)
# plt.show()

In [41]:
load_params = dict(run_name='Testing', seed=5.0, scale=5, dummy=8.0, run_number=0)
output, params, run_id, run_dir = mom.load_data(load_params, run_name=name, table_path=path)

In [42]:
output

{'lam1': 160.46450778794338}

In [43]:
params

{'dummy': 8,
 'output_dir': 'test_output/Testing_3',
 'param_matrix': array([[ 5.,  0.,  0.],
        [ 0.,  5.,  0.],
        [ 0.,  0.,  5.]]),
 'run_id': 3,
 'run_name': 'Testing',
 'scale': 5,
 'seed': 5,
 'table_path': 'test_output/run_log.csv'}

## Demonstrate Memoization

In [44]:
# Now let's see what happens when we try to save model output with less parameters. It is handled gracefully.
scale = 5
seed = 2
name = "Testing"
table_params = OrderedDict(run_name=name, scale=scale, seed=seed)
def run_simulation(arg_dict):
    run_exists = model_output_manager.run_with_params_exists(arg_dict, table_path=path)
    if run_exists:
#         load_params = dict(run_name='Testing', seed=5.0, scale=5, dummy=8.0, run_number=0)
        print("Loading data")
        output, params, run_id, run_dir = mom.load_data(arg_dict, run_name=name, table_path=path)
        return output
    else:
        print("Rerunning")
        param_matrix = scale * np.eye(1000)
        params = arg_dict.copy()
        params['param_matrix'] = param_matrix
        output = run_model(seed=0, scale=scale)
        mom.save_model(table_params=arg_dict, table_path=path, model_output=output, params=params, 
                                         run_name=name, overwrite_existing=True)
        return output
    

In [45]:
# shutil.rmtree("test_output", ignore_errors=True)
run_simulation(table_params)

Rerunning

Attempting to write data to '/Users/matt/code_projects/demonstrate_memoization/test_output/Testing_5/model_data.h5'

Done. Data written.


{'lam1': 158.73434901080122}

In [46]:
run_simulation(table_params)
print(pd.read_csv(path, index_col=0))

Loading data
  run_name  seed  scale  dummy  run_number
0  Testing  10.0      5    NaN           0
1  Testing  10.0      5    NaN           1
2  Testing   5.0      5    NaN           0
3  Testing   5.0      5    8.0           0
4  Testing   NaN      5    NaN           0
5  Testing   2.0      5    NaN           0


### Instead of saving and loading via mom.save_model and mom.load_model, you can also just use mom.dir_for_run to create an empty directory that you can then put stuff in via pickle.dump or whatever, i.e. you can let mom handle the output directory management but do the saving and loading from this directory manually.