In [1]:
import os
import shutil
import numpy as np
import subprocess as sp

In [2]:
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ModelCollectionConfigFileHandler import ModelCollectionConfigFileHandler
from DistributeTrainingSettings import distribute_training_settings

Welcome to JupyROOT 6.10/09


Using TensorFlow backend.


In [3]:
# this looks through several training runs (e.g. as they originate from a hyperparameter sweep) and takes for each 
# model the best one that was encountered

In [4]:
def get_loss(run, mcoll, model):
    confhandler = ConfigFileHandler()
    confhandler.load_configuration(os.path.join(campaign_workspace, run, "training", mcoll, "model_benchmark.txt"))
    return float(confhandler.get_field(model, 'val_loss'))

In [5]:
campaign_workspace = "/data_CMS/cms/wind/"

In [6]:
# runs to check for (good) models (the first one passed is taken as reference run from which the available models
# are taken - it is expected that all others runs also follow this structure):
input_runs = ["180515_hyperparameter_sweep/number_neurons_16.0number_layers_0.0",
              "180515_hyperparameter_sweep/number_neurons_16.0number_layers_1.0",
              "180515_hyperparameter_sweep/number_neurons_16.0number_layers_2.0",
              "180515_hyperparameter_sweep/number_neurons_16.0number_layers_3.0",
              "180515_hyperparameter_sweep/number_neurons_32.0number_layers_0.0",
              "180515_hyperparameter_sweep/number_neurons_32.0number_layers_1.0",
              "180515_hyperparameter_sweep/number_neurons_32.0number_layers_2.0",
              "180515_hyperparameter_sweep/number_neurons_32.0number_layers_3.0",
              "180516_hyperparameter_sweep/number_neurons_48.0number_layers_0.0",
              "180516_hyperparameter_sweep/number_neurons_48.0number_layers_1.0",
              "180516_hyperparameter_sweep/number_neurons_48.0number_layers_2.0",
              "180516_hyperparameter_sweep/number_neurons_48.0number_layers_3.0",
              "180516_hyperparameter_sweep/number_neurons_64.0number_layers_0.0",
              "180516_hyperparameter_sweep/number_neurons_64.0number_layers_1.0",
              "180516_hyperparameter_sweep/number_neurons_64.0number_layers_2.0",
              "180516_hyperparameter_sweep/number_neurons_64.0number_layers_3.0"]

In [7]:
output_run = "180518_optimized_hyperparameters/optimized"

In [8]:
os.makedirs(os.path.join(campaign_workspace, output_run))

In [9]:
# load the available model names
reference_run = input_runs[0]
available_mcolls = os.walk(os.path.join(campaign_workspace, reference_run, "training")).next()[1]

In [10]:
mcolls_winning = []

In [11]:
for mcoll in available_mcolls:
    models = os.walk(os.path.join(campaign_workspace, reference_run, "training", mcoll)).next()[1]

    # load a representative version of the current model collection...
    mconfhandler = ModelCollectionConfigFileHandler()
    mconfhandler.load_configuration(os.path.join(campaign_workspace, reference_run, "settings_training", mcoll, "settings.conf"))
    mcoll_template = mconfhandler.GetModelCollection()[0]
    
    # ... but strip away all the actual model components
    mcoll_template.model_dict = {}
    mcoll_template.preprocessor_dict = {}
    mcoll_template.settings_dict = {}
    
    for model in models:
        # compare this model across the different runs
        losses = [get_loss(run, mcoll, model) for run in input_runs]

        winner = np.argmin(losses)
        
        winning_run = input_runs[winner]
        
        # copy the winning model into the output run
        shutil.copytree(os.path.join(campaign_workspace, winning_run, "training", mcoll, model),
                        os.path.join(campaign_workspace, output_run, "training", mcoll, model))

        print "--------------------------------------------"
        print " take " + model + " from " + winning_run
        print "--------------------------------------------"
        
        # load the winning model to keep track of its settings
        mconfhandler = ModelCollectionConfigFileHandler()
        mconfhandler.load_configuration(os.path.join(campaign_workspace, winning_run, "settings_training", mcoll, "settings.conf"))
        mcoll_winning = mconfhandler.GetModelCollection()[0]
        
        # then pull the winning model over into the template
        winning_model = mcoll_winning.model_dict[model]
        winning_preprocessor = mcoll_winning.preprocessor_dict[model]
        winning_settings = mcoll_winning.settings_dict[model]
        
        mcoll_template.add_model(winning_preprocessor, winning_model, winning_settings)
        
    mcolls_winning.append(mcoll_template)
    
# now save the put-together config file also into the output run
mconfhandler = ModelCollectionConfigFileHandler()
mconfhandler.ToConfiguration(mcolls_winning)
mconfhandler.save_configuration(os.path.join(campaign_workspace, output_run, "settings.conf"))

attempting to load configuration file from /data_CMS/cms/wind/180515_hyperparameter_sweep/number_neurons_16.0number_layers_0.0/settings_training/D_VBF_ggH_ML/settings.conf
got the following list of hyperparams: {u'number_layers': 0.0, u'number_neurons': 16.0}
building network with 29 inputs
FlexiblePCAPreprocessor set up for 29 inputs
got the following list of hyperparams: {u'number_layers': 0.0, u'number_neurons': 16.0}
building network with 11 inputs
FlexiblePCAPreprocessor set up for 11 inputs
got the following list of hyperparams: {u'number_layers': 0.0, u'number_neurons': 16.0}
building network with 16 inputs
FlexiblePCAPreprocessor set up for 16 inputs
attempting to load configuration file from /data_CMS/cms/wind/180515_hyperparameter_sweep/number_neurons_16.0number_layers_0.0/training/D_VBF_ggH_ML/model_benchmark.txt
attempting to load configuration file from /data_CMS/cms/wind/180515_hyperparameter_sweep/number_neurons_16.0number_layers_1.0/training/D_VBF_ggH_ML/model_benchmark

In [12]:
# now distriute again the training settings, as usual:
distribute_training_settings(os.path.join(campaign_workspace, output_run + '/'))

attempting to load configuration file from /data_CMS/cms/wind/180518_optimized_hyperparameters/optimized/settings.conf
got the following list of hyperparams: {u'number_layers': 0.0, u'number_neurons': 48.0}
building network with 29 inputs
FlexiblePCAPreprocessor set up for 29 inputs
got the following list of hyperparams: {u'number_layers': 0.0, u'number_neurons': 16.0}
building network with 11 inputs
FlexiblePCAPreprocessor set up for 11 inputs
got the following list of hyperparams: {u'number_layers': 1.0, u'number_neurons': 16.0}
building network with 16 inputs
FlexiblePCAPreprocessor set up for 16 inputs
got the following list of hyperparams: {u'number_layers': 1.0, u'number_neurons': 48.0}
building network with 28 inputs
FlexiblePCAPreprocessor set up for 28 inputs
got the following list of hyperparams: {u'number_layers': 1.0, u'number_neurons': 48.0}
building network with 29 inputs
FlexiblePCAPreprocessor set up for 29 inputs
got the following list of hyperparams: {u'number_layers'