In [1]:
import sys, os
sys.path.insert(0, '/home/llr/cms/wind/cmssw/CMSSW_9_4_2/src/ZZAnalysis/AnalysisStep/test/Python/')

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ModelCollectionConfigFileHandler import ModelCollectionConfigFileHandler

Welcome to JupyROOT 6.10/09


Using TensorFlow backend.


In [4]:
import ROOT

In [5]:
campaign_workspace = "/data_CMS/cms/wind/"

In [6]:
def get_loss(run, mcoll, model):
    confhandler = ConfigFileHandler()
    confhandler.load_configuration(os.path.join(campaign_workspace, run, "training", mcoll, "model_benchmark.txt"))
    return float(confhandler.get_field(model, 'val_loss'))

In [7]:
def index_translation_dict(df, column):
    slist = sorted(list(set(df[column])))
    print slist
    return {val: index for index, val in enumerate(slist)}

In [8]:
def convert_to_matshow(df, keys, plotcol):
    translations = []
    for key in keys:
        translations.append(index_translation_dict(df, key))
                
    plot_data = np.zeros((len(translations[0]), len(translations[1]))) 
        
    for index, row in df.iterrows():
        i = translations[0][row[keys[0]]]
        j = translations[1][row[keys[1]]]

        plot_data[i,j] = row[plotcol]
       
    return plot_data, np.array(sorted(translations[0].keys())), np.array(sorted(translations[1].keys()))

In [9]:
input_runs = ["180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_16.0number_layers_0.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_16.0number_layers_1.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_16.0number_layers_2.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_16.0number_layers_3.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_32.0number_layers_0.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_32.0number_layers_1.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_32.0number_layers_2.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_32.0number_layers_3.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_48.0number_layers_0.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_48.0number_layers_1.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_48.0number_layers_2.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_48.0number_layers_3.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_64.0number_layers_0.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_64.0number_layers_1.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_64.0number_layers_2.0",
              "180525_hyperparameter_sweep_selvars_leading_jets_2/number_neurons_64.0number_layers_3.0"]

In [10]:
mcoll_name = "D_ggH_qq_ML"
model_name = "D_ggH_qq_ML"
pretty_name = r'$s_{\mathrm{ggH, qq}}$'

In [11]:
df = pd.DataFrame()

for run in input_runs:
    mconfhandler = ModelCollectionConfigFileHandler()
    mconfhandler.load_configuration(os.path.join(campaign_workspace, run, "settings_training", mcoll_name, "settings.conf"))
    mcolls = mconfhandler.GetModelCollection()
    
    row_dict = {}
    for mcoll in mcolls:
        if mcoll.name == mcoll_name:
            row_dict = dict(mcoll.model_dict[model_name].hyperparameters)
            break
    
    loss = get_loss(run, mcoll_name, model_name)
    row_dict["loss"] = loss
    
    df = df.append(row_dict, ignore_index = True)

attempting to load configuration file from /data_CMS/cms/wind/180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_16.0number_layers_0.0/settings_training/D_ggH_qq_ML/settings.conf
got the following list of hyperparams: {u'number_layers': 0.0, u'number_neurons': 16.0}
building network with 26 inputs
FlexiblePCAPreprocessor set up for 26 inputs
attempting to load configuration file from /data_CMS/cms/wind/180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_16.0number_layers_0.0/training/D_ggH_qq_ML/model_benchmark.txt
attempting to load configuration file from /data_CMS/cms/wind/180525_hyperparameter_sweep_selvars_leading_jets_1/number_neurons_16.0number_layers_1.0/settings_training/D_ggH_qq_ML/settings.conf
got the following list of hyperparams: {u'number_layers': 1.0, u'number_neurons': 16.0}
building network with 26 inputs
FlexiblePCAPreprocessor set up for 26 inputs
attempting to load configuration file from /data_CMS/cms/wind/180525_hyperparameter_sweep_s

In [12]:
data, x_label, y_label = convert_to_matshow(df, ["number_layers", "number_neurons"], "loss")

[0.0, 1.0, 2.0, 3.0]
[16.0, 32.0, 48.0, 64.0]


In [13]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(111)
cax = ax.matshow(data.transpose(), interpolation = 'nearest', cmap = 'RdYlGn_r', vmin = np.min(data), vmax = np.max(data))
ax.set_xticklabels(np.concatenate([[''], x_label.astype(int) + 1]))
ax.set_yticklabels(np.concatenate([[''], y_label.astype(int)]))
ax.xaxis.set_label_position("bottom")
ax.xaxis.tick_bottom()
ax.set_xlabel("hidden layers")
ax.set_ylabel("neurons / hidden layer")
ax.set_title(pretty_name, fontsize = 15)

for cur in np.ndenumerate(data):
    coords = cur[0]
    val = cur[1]
    
    if val > 0.053:
        color = 'black'
    else:
        color = 'white'
    
    ax.text(coords[0], coords[1], "{:4.4f}".format(val), va = 'center', ha = 'center', color = color, weight = 'bold')
    
plt.tight_layout()

In [18]:
def root_matshow(data, x_ticklabels, y_ticklabels, x_axislabel, y_axislabel, show_values, outfile):
    c = ROOT.TCanvas("canv", "canv", 800, 800)
    
    rows = np.shape(data)[0]
    cols = np.shape(data)[1]
    
    num_colors = 3;
                            
    reds = np.array([0.0, 0.99992310650208416, 0.64705884456634521])
    greens = np.array([0.40784314274787903, 0.99761630156460934, 0.0])
    blues = np.array([0.21568627655506134, 0.74502116091111126, 0.14901961386203766])
    stops = np.array([0.0, 0.5, 1.0])
    
    num_internal_colors = 150;
    ROOT.TColor.CreateGradientColorTable(num_colors, stops, reds, greens, blues, num_internal_colors);
    
    hist = ROOT.TH2F("matshow", "s_{ggH, q#bar{q}#rightarrow ZZ}", rows, -0.5, rows - 0.5, cols, -0.5, cols - 0.5)
    ROOT.gStyle.SetOptStat(0)

    for row in range(rows):
        for col in range(cols):
            hist.Fill(row, col, data[row, col])
            
    # do the axis tick labels and axis labels
    hist.GetXaxis().SetLabelOffset(100)
    hist.GetYaxis().SetLabelOffset(100)
    
    c.cd()
    c.SetLeftMargin(0.15)
    c.SetBottomMargin(0.15)
    hist.Draw("col")
    
    t = ROOT.TLatex()
    t.SetTextAlign(22)
    t.SetTextSize(0.035)
        
    y = ROOT.gPad.GetUymin() - 0.7 * hist.GetYaxis().GetBinWidth(1)
    for i, label in enumerate(x_ticklabels):
        x = hist.GetXaxis().GetBinCenter(i + 1)
        t.DrawLatex(x, y, label)
        
    t.SetTextSize(0.035)
    t.DrawLatex(0.5 * hist.GetXaxis().GetBinCenter(rows), y - 0.3 * hist.GetYaxis().GetBinWidth(1), x_axislabel)
    t.SetTextSize(0.035)

    x = ROOT.gPad.GetUxmin() - 0.7 * hist.GetXaxis().GetBinWidth(1)
    for i, label in enumerate(y_ticklabels):
        y = hist.GetYaxis().GetBinCenter(i + 1)
        t.DrawLatex(x, y, label)
        
    t.SetTextSize(0.035)
    t.SetTextAngle(90)
    t.SetTextAlign(22)
    t.DrawLatex(x - 0.3 * hist.GetXaxis().GetBinWidth(1), 0.5 * hist.GetYaxis().GetBinCenter(rows), y_axislabel)
    t.SetTextSize(0.035)
    
    t.SetTextAngle(0)
    if show_values:
        for row in range(rows):
            for col in range(cols):
                x = hist.GetXaxis().GetBinCenter(col + 1)
                y = hist.GetYaxis().GetBinCenter(row + 1)
                if hist.GetBinContent(col + 1, row + 1) < 0.048 or hist.GetBinContent(col + 1, row + 1) > 0.05:
                    t.SetTextColor(ROOT.kWhite)
                else:
                    t.SetTextColor(ROOT.kBlack)
                t.DrawLatex(x, y, str("{0:.4f}".format(hist.GetBinContent(col + 1, row + 1))))
                
    ROOT.gPad.RedrawAxis()
    c.SetTicks(1,1)
    c.Update()
        
    c.SaveAs(outfile)

In [19]:
plt.savefig("/data_CMS/cms/wind/HyperparameterPlots/ggH_qq.pdf")
#plt.show()

In [20]:
x_ticklabels = (x_label + 1).astype(int).astype(str)
y_ticklabels = y_label.astype(int).astype(str)

In [21]:
root_matshow(np.fliplr(data), x_ticklabels, np.flipud(y_ticklabels), "hidden layers", "hidden neurons / layer", True, "/data_CMS/cms/wind/HyperparameterPlots/ggH_qq_rootstyle.pdf")

Info in <TCanvas::Print>: pdf file /data_CMS/cms/wind/HyperparameterPlots/ggH_qq_rootstyle.pdf has been created
