In [None]:
import pickle
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate, GridSearchCV
from xgboost.sklearn import XGBClassifier 
from sklearn import metrics
from sklearn.utils.class_weight import compute_class_weight



cut='Boosted_SR' # Boosted_SR or Resolved_SR
year='2018_SR_new'       #version of the samples, must correspond to the name of the folder in base_dir
vers='' #blank, or _wotop or _DYonly
base_dir='D:\\Travail\\ZV_analysis'
model_name='DNN_180_0_0_0_0_lr0.0005_dp0.2'
model_dir=base_dir+'\\'+year+'\\'+cut+'\\Res_{}'.format(model_name)
vers=''

variables_list= ['Resolved_SR_pt1', 'Resolved_SR_pt2', 'Resolved_SR_eta1',
       'Resolved_SR_eta2', 'Resolved_SR_mll', 'Resolved_SR_Zepp_ll',
       'Resolved_SR_nFatJet', 'Resolved_SR_FatJet_pt', 'Resolved_SR_FatJeteta',
       'Resolved_SR_FatJet_softdropmass', 'Resolved_SR_FatJet_tau21',
       'Resolved_SR_Zlep_1', 'Resolved_SR_Zlep_2', 'Resolved_SR_category',
       'Resolved_SR_vbs_jet_pt1', 'Resolved_SR_vbs_jet_pt2',
       'Resolved_SR_vbs_jet_eta1', 'Resolved_SR_vbs_jet_eta2',
       'Resolved_SR_V_jet_pt1', 'Resolved_SR_V_jet_pt2',
       'Resolved_SR_V_jet_eta1', 'Resolved_SR_V_jet_eta2',
       'Resolved_SR_mjj_max', 'Resolved_SR_detajj_mjjmax',
       'Resolved_SR_dphijj_mjjmax', 'Resolved_SR_V_jet_mass']
if cut == 'Boosted_SR':
    features =['pt1', 'pt2', 'eta1',
           'eta2', 'mll',
           'FatJet_pt', 'FatJeteta',
           'Zlep_1', 'Zlep_2', 
           'vbs_jet_pt1', 'vbs_jet_pt2',
           'vbs_jet_eta1', 'vbs_jet_eta2',
           'mjj_max', 'detajj_mjjmax']
elif cut == 'Resolved_SR'
    features=['pt1', 'pt2', 'eta1',
           'eta2', 'mll',
           'Zlep_1', 'Zlep_2', 
           'vbs_jet_pt1', 'vbs_jet_pt2',
           'vbs_jet_eta1', 'vbs_jet_eta2',
           'V_jet_pt1', 'V_jet_pt2',
           'V_jet_eta1', 'V_jet_eta2',
           'mjj_max', 'detajj_mjjmax',
           'V_jet_mass']
input_list= features
inputs= [cut+'_'+i for i in input_list]

## SAVE THE MODEL, ITS METADATA AND TRAINING INFORMATIONS

# dump the variables list
import yaml
varfile = os.path.join(model_dir, "variables.yml")
old_file= os.path.join(model_dir, "variables_old.yml")
if os.path.isfile(varfile):
    print("ACHTUNG! variables file already existing: old file renamed with '_old'")
    if os.path.isfile(old_file):
        os.remove(old_file)
    os.rename(varfile, varfile[:-4] + "_old.yml")
with open(varfile, "w") as out_var_file:
    out_var_file.write(yaml.dump(inputs))
    

## Export:
## * keras model to tensorflow model
## * tf metadata
## * scaler

args = {
    "dir": model_dir,
    "input": "model.h5",
    "output": "model.pb",
    "tf_metadata": "tf_metadata.txt",
    "input_scaler": "scaler_model.pkl",
    "output_scaler": "scaler.txt",
}


from keras import backend as K
# This line must be executed before loading Keras model.
K.set_learning_phase(0)

from keras.models import load_model
model = load_model(os.path.join(args["dir"], args["input"]))

from keras import backend as K
import tensorflow as tf
                   

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.

    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        # Graph -> GraphDef ProtoBuf
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def,
                                                      output_names, freeze_var_names)
        return frozen_graph


frozen_graph = freeze_session(K.get_session(),
                              output_names=[out.op.name for out in model.outputs])

# Save to ./model/tf_model.pb
tf.train.write_graph(frozen_graph, args["dir"], args["output"], as_text=False)

## save tensorflow metadata
with open(os.path.join(args["dir"], args["tf_metadata"]), "w") as f:
    f.write(str(model.inputs[0].name) + " " + str(model.outputs[0].name) + "\n")

In [None]:
#dump scaler
#be careful to but the scaler pickle in the DNN folder

scaler = pickle.load(open(os.path.join(args["dir"], args["input_scaler"]), 'rb'))
with open(os.path.join(args["dir"], args["output_scaler"]), "w") as f:
    scaler_df=pd.DataFrame(zip(scaler.mean_, scaler.scale_),index=variables_list).T
    for input in inputs:
        mean = scaler_df[input][0]
        scale = scaler_df[input][1]
        if cut=='Resolved_SR':
            var=input[12:]
        elif cut=='Boosted_SR':
            var=input[11:]
        print(var, mean, scale)
        #print('var:{}, type: {}'.format(var, type(mean.astype('float32'))))
        
        f.write(var + " " + str(mean.astype('float32')) + " " + str(scale.astype('float32')) + "\n")
        
print("Dumping done.")