# Sacred python package

Sacred is a tool to help you configure, organize, log and reproduce experiments. It is designed to do all the tedious overhead work that you need to do around your actual experiment in order to:

* keep track of all the parameters of your experiment
* easily run your experiment for different settings
* save configurations for individual runs in a database
* reproduce your results

## Installation
`python -m pip install sacred`

NOTE: not availeble in conda

## Example experiment

In [8]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

X = np.load('data/example_data.npy')
    
pca = PCA()
    
fit_pca = pca.fit(X)
    
enc = fit_pca.transform(X)
    
dec = fit_pca.inverse_transform(enc)
        
print(mean_squared_error(X, dec))

3.27137e-14


## Simplest sacred experiment:

In [6]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

from sacred import Experiment
ex = Experiment('Presentation ex 1', interactive=True)

@ex.main
def train():
    X = np.load('data/example_data.npy')
    
    pca = PCA()
    
    fit_pca = pca.fit(X)
    
    enc = fit_pca.transform(X)
    
    dec = fit_pca.inverse_transform(enc)
        
    return mean_squared_error(X, dec)

ex.run()
    
    

INFO - Presentation ex 1 - Running command 'train'
INFO - Presentation ex 1 - Started
INFO - Presentation ex 1 - Result: 3.2713703301172695e-14
INFO - Presentation ex 1 - Completed after 0:00:01


<sacred.run.Run at 0x7f978d098dd8>

NOTE: since this presentation is a notebook, the interpeter is in interative mode, the extra parameter in the experiment definition is not advised in a normal experiment.
Furthermore, instead of running the experiment in te last line, one can also change @ex.main to @ex.automain, making that last function unnecessary, however this is not possible in interactive mode.

## Adding file observer
While the example above seems to add code and just generating some extra stats about the experiment, sacred becomes very useful when we add an observer.

In [10]:
import numpy as np
import os
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

from sacred import Experiment
ex = Experiment('Presentation ex 1', interactive=True)

from sacred.observers import FileStorageObserver
ex.observers.append(FileStorageObserver.create(basedir=os.path.join('runs', ex.path)))

@ex.main
def train():
    X = np.load('data/example_data.npy')
    
    pca = PCA()
    
    fit_pca = pca.fit(X)
    
    enc = fit_pca.transform(X)
    
    dec = fit_pca.inverse_transform(enc)
        
    return mean_squared_error(X, dec)

ex.run()
    
    

INFO - Presentation ex 1 - Running command 'train'
INFO - Presentation ex 1 - Started run with ID "1"
INFO - Presentation ex 1 - Result: 3.2713703301172695e-14
INFO - Presentation ex 1 - Completed after 0:00:01


<sacred.run.Run at 0x7f978c650dd8>

In [20]:
path = "runs/Presentation ex 1/{}".format(1)
os.listdir(path)

['run.json', 'config.json', 'cout.txt']

In [22]:
with open (path + '/run.json', 'r') as f:
    print(f.read())

{
  "artifacts": [],
  "command": "train",
  "experiment": {
    "base_dir": "/home/sako",
    "dependencies": [
      "IPython==5.3.0",
      "numpy==1.13.1",
      "platform==1.0.8",
      "sacred==0.7.0",
      "sklearn==0.19.0"
    ],
    "mainfile": null,
    "name": "Presentation ex 1",
    "repositories": [],
    "sources": []
  },
  "heartbeat": "2017-09-10T13:01:06.246606",
  "host": {
    "ENV": {},
    "cpu": "Intel(R) Xeon(R) CPU E5-2630L v4 @ 1.80GHz",
    "hostname": "hostname",
    "os": [
      "Linux",
      "Linux-version"
    ],
    "python_version": "3.6.1"
  },
  "meta": {
    "command": "train",
    "options": {
      "--beat_interval": null,
      "--capture": null,
      "--comment": null,
      "--debug": false,
      "--enforce_clean": false,
      "--file_storage": null,
      "--force": false,
      "--help": false,
      "--loglevel": null,
      "--mongo_db": null,
      "--name": null,
      "--pdb": false,
      "--print_config": false,
      "--priority

In [23]:
with open (path + '/config.json', 'r') as f:
    print(f.read())

{
  "seed": 193138917
}


In [24]:
with open (path + '/cout.txt', 'r') as f:
    print(f.read())




NOTE: Source is normally also saved in a separate folder, however this is not possible in interactive mode

## Defining more sacred functions

In [28]:
import numpy as np
import os
from sklearn.decomposition import KernelPCA
from sklearn.metrics import mean_squared_error

from sacred import Experiment
ex = Experiment('Presentation ex 2', interactive=True)

from sacred.observers import FileStorageObserver
ex.observers.append(FileStorageObserver.create(basedir=os.path.join('runs', ex.path)))

@ex.config
def config():
    data_path = 'data/example_data.npy'
    
    pca = KernelPCA(fit_inverse_transform=True)

@ex.capture
def data(data_path):
    print('Reading data')
    X = np.load(data_path)
    
    return X

@ex.main
def train(pca):
    X = data()
    
    print('Fitting PCA')
    fit_pca = pca.fit(X)
    
    enc = fit_pca.transform(X)
    
    dec = fit_pca.inverse_transform(enc)
        
    return mean_squared_error(X, dec)

ex.run()
    
    

INFO - Presentation ex 2 - Running command 'train'
INFO - Presentation ex 2 - Started run with ID "1"


Reading data
Fitting PCA


INFO - Presentation ex 2 - Result: 0.08322899043560028
INFO - Presentation ex 2 - Completed after 0:00:01


<sacred.run.Run at 0x7f978c331f98>

In [33]:
path = "runs/Presentation ex 2/{}".format(1)
os.listdir(path)

['run.json', 'config.json', 'cout.txt']

In [31]:
with open (path + '/config.json', 'r') as f:
    print(f.read())

{
  "data_path": "data/example_data.npy",
  "pca": {
    "py/object": "sklearn.decomposition.kernel_pca.KernelPCA",
    "py/state": {
      "_centerer": {
        "py/object": "sklearn.preprocessing.data.KernelCenterer",
        "py/state": {
          "_sklearn_version": "0.19.0"
        }
      },
      "_sklearn_version": "0.19.0",
      "alpha": 1.0,
      "coef0": 1,
      "copy_X": true,
      "degree": 3,
      "eigen_solver": "auto",
      "fit_inverse_transform": true,
      "gamma": null,
      "kernel": "linear",
      "kernel_params": null,
      "max_iter": null,
      "n_components": null,
      "n_jobs": 1,
      "random_state": null,
      "remove_zero_eig": false,
      "tol": 0
    }
  },
  "seed": 925025430
}


In [34]:
# due to the interative mode, the capturing of the output does not work
# however this file would contain everythin printed to the console, like the print statements in the code above
with open (path + '/cout.txt', 'r') as f:
    print(f.read())




## Adding more output files
You would probably want to save some more files with statistics and outputs, there are two good ways of doing this, both having dat (dis-)advantages

In [49]:
import numpy as np
import pandas as pd
import os
from sklearn.decomposition import KernelPCA
from sklearn.metrics import mean_squared_error

# parameters
save_model = True

from sacred import Experiment
ex = Experiment('Presentation ex 3', interactive=True)

from sacred.observers import FileStorageObserver
ex.observers.append(FileStorageObserver.create(basedir=os.path.join('runs', ex.path)))

@ex.config
def config():
    data_path = 'data/example_data.npy'
    
    pca = KernelPCA(fit_inverse_transform=True)

@ex.capture
def data(data_path):
    print('Reading data')
    X = np.load(data_path)
    
    return X

def pickle_object(obj):
    import tempfile
    import pickle
    
    fp = tempfile.mktemp()
    with open(fp, 'wb') as f:
        pickle.dump(obj, f)
    return fp

@ex.main
def train(pca):
    X = data()
    
    print('Fitting PCA')
    fit_pca = pca.fit(X)
    
    #option one, adding as sacred artifect
    if save_model:
            ex.add_artifact(pickle_object(fit_pca), 'fitted_{}.pkl'.format(pca.__class__.__name__))
    
    enc = fit_pca.transform(X)
    
    dec = fit_pca.inverse_transform(enc)
    
    mse = mean_squared_error(X, dec)
    
    #option two, writing to folder manually
    scores = []
    scores.append([pca.__class__.__name__, enc.shape, mse])
    pd.DataFrame(scores).to_csv(os.path.join(ex.observers[0].dir, 'scores.csv'), header=False, index=False)
    
    return mse
    

ex.run()
    

INFO - Presentation ex 3 - Running command 'train'
INFO - Presentation ex 3 - Started run with ID "1"


Reading data
Fitting PCA


INFO - Presentation ex 3 - Result: 0.08322899043560028
INFO - Presentation ex 3 - Completed after 0:00:01


<sacred.run.Run at 0x7f978c3312e8>

In [50]:
path = "runs/Presentation ex 3/{}".format(1)
os.listdir(path)

['run.json', 'fitted_KernelPCA.pkl', 'config.json', 'scores.csv', 'cout.txt']

## Adding MongoDB observer
Adding a MongoDB observer adds a whole lot of advantages to your experiment. If you have access to a mongodb server or you have your own server and want to go through the hassle of installing and setting up a safe mongoDB, that would be your best options. If both are not the case you can create a free acount at MongoDB Atlas for a 512mb MongoDB online server:

https://www.mongodb.com/cloud/atlas/

I will not go into detail about how to set this up, and from now I assume you have a secure server at hand. 

Advantages of using mongoDB:
* Extra external place of secure sotrage
* Query your results
* Experiments receive a unique ID (especially usefull when developing on multiple devices)

Building a debug mode is very much advised to not clog up your DB with failed experiments

NOTE: pymongo will need to be installed to use this observer

In [52]:
import numpy as np
import pandas as pd
import os
from sklearn.decomposition import KernelPCA
from sklearn.metrics import mean_squared_error

# parameters
DEBUG = False
save_model = True

from sacred import Experiment
ex = Experiment('Presentation ex 4', interactive=True)

from sacred.observers import FileStorageObserver
ex.observers.append(FileStorageObserver.create(basedir=os.path.join('runs', ex.path)))
if not DEBUG:
    from sacred.observers import MongoObserver
    ex.observers.append(MongoObserver.create(url='mongodb://user:pass@server1.mongodb.net:27017,server2.mongodb.net:27017,server3.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin',
                                            db_name='graduation'))

@ex.config
def config():
    data_path = 'data/example_data.npy'
    
    pca = KernelPCA(fit_inverse_transform=True)

@ex.capture
def data(data_path):
    print('Reading data')
    X = np.load(data_path)
    
    return X

@ex.main
def train(pca):
    X = data()
    
    print('Fitting PCA')
    fit_pca = pca.fit(X)
    
    #option one, adding as sacred artifect
    if save_model:
            ex.add_artifact(pickle_object(fit_pca), 'fitted_{}.pkl'.format(name))
    
    enc = fit_pca.transform(X)
    
    dec = fit_pca.inverse_transform(enc)
    
    mse = mean_squared_error(X, dec)
    
    #option two, writing to folder manually
    scores = []
    scores.append([pca.__class__.__name__, enc.shape, mse])
    pd.DataFrame(scores).to_csv(os.path.join(ex.observers[0].dir, 'scores.csv'), header=False, index=False)
    
    return mse
    

ex.run()
    

INFO - Presentation ex 4 - Running command 'train'
INFO - Presentation ex 4 - Started run with ID "15"


Reading data
Fitting PCA


INFO - Presentation ex 4 - Result: 0.08322899043560028
INFO - Presentation ex 4 - Completed after 0:00:04


<sacred.run.Run at 0x7f978d09ecc0>

## Aditional example for a Keras Autoencoder

In [None]:
import numpy as np
import os, sys
sys.path.append('../')

from tools.generator import DataGenerator

from keras.layers import Input, Dense
from keras.models import Model
from keras.callbacks import TensorBoard

from sacred import Experiment
from sacred.observers import FileStorageObserver
from sacred.observers import MongoObserver

# DEBUG parameters
DEBUG = True
if DEBUG:
    save_model = True
    tensorboard = True
else:
    save_model = False
    tensorboard = False

# create experiment:
ex = Experiment('Autoencoder')

# add file observer
observer_path = '../runs/DEBUG' if DEBUG else '../runs'
ex.observers.append(FileStorageObserver.create(basedir=os.path.join(observer_path, ex.path)))

if not DEBUG:
    # add mongo observer
    with open('../tools/.mongo', 'r') as f:
        auth_url = f.read()
        ex.observers.append(MongoObserver.create(url=auth_url, db_name='graduation'))


@ex.config
def my_config():
    data_path = r'../data/tcga-gbm_exp.npy'
    cols, rows = np.load(data_path, mmap_mode='r').shape

    batch_size = 32

    encoding_dim = 100
    samples_per_epoch = 1
    nb_val_samples = 1
    nb_epoch = 50


@ex.capture
def data(data_path, batch_size):
    training_generator = DataGenerator(data_path, train=True, batch_size=batch_size).generate()
    validation_generator = DataGenerator(data_path, train=False, batch_size=batch_size).generate()

    return training_generator, validation_generator


@ex.capture
def model(encoding_dim, rows, batch_size):
    input = Input(batch_shape=(batch_size, rows), name='input')
    encoded = Dense(encoding_dim, activation='relu', name='encoded')(input)
    decoded = Dense(rows, activation='sigmoid', name='decoded')(encoded)

    autoencoder = Model(inputs=input, outputs=decoded)

    return autoencoder


@ex.automain
def train(nb_epoch, samples_per_epoch, nb_val_samples):
    training_generator, validation_generator = data()
    out_path = ex.observers[0].dir

    m = model()

    m.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    callbacks = []
    if tensorboard:
        callbacks.append(TensorBoard(log_dir=out_path, histogram_freq=1, write_graph=True, write_images=True))

    m.summary()

    history = m.fit_generator(generator=training_generator,
                              steps_per_epoch=samples_per_epoch,
                              epochs=nb_epoch,
                              validation_data=validation_generator,
                              validation_steps=nb_val_samples,
                              callbacks=callbacks)

    if save_model:
        m.save(os.path.join(out_path, 'model.h5'))

    results = {}
    for key in history.history.keys():
        results[key] = history.history[key][-1]

    return results


# Find this notebook at:

https://github.com/sakoarts/sacred_presentation

sakoarts.nl/sacred