### Neural networks for Run03 data with different masses

Here we are generating a series of models for the Run03 data using keras.

The goal is to first train with 3.5GeV and then use the completed model as the starting point
for the 4GeV model, and so on.

In [None]:
'''

'''
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import auc, roc_curve
import sys
import multiprocessing as mp
from multiprocessing import Pool
sys.path.append("../")

from lux_ml.utils import convert_root_to_npz, convert_csv_to_npz
from lux_ml.utils import make_folder, generate_binary_training_testing_data
from lux_ml.mlp import MLP
from lux_ml.mi import mi_binary, mi_binary_weights
from lux_ml.subsets_mi import generate_model
import lux_ml.mi

In [None]:
data_folder = "../../Data/"

sig_name = 'golden_signal_'
back_name = 'ML_bkgs_other_Run3_ROI_cuts_raw'

# ignoring masses <= 5GeV for now
signal = [
    3.5, 4, 5, 7, 
    10, 13, 15, 17, 
    19, 21, 24, 27, 
    30, 33, 37, 40, 
    50, 60, 75, 100, 
    300, 1000
]

convert_files = False
if convert_files:
    convert_root_to_npz(
        data_folder + back_name,
        'summary',
        ['r','z','s1c','s2c','weight']
    )
    for sig in signal:
        convert_root_to_npz(
            data_folder + sig_name + str(sig) + "GeV_Unif_Trigger_PSD_Run03",
            'summary',
            ['r','z_real','s1','s2','weight']
        )

# if recording multiple ROC curves from training more than once, run this once and the cells below more
fpr_list = []
tpr_list = []
auc_list = []
leakage_list = []
model_list = []
mi_list = []

var_names = ['r','z','s1','s2']

In [None]:
def generateModel(
    mass,
    topology,
    epochs,
    batch_size,
    folder,
    prev_mass=0,
    mi_thres=.025,
    mi_input=-1
):
    var_set = [0,1,2,3]
    activations = ['selu' for l in range(len(topology)-1)]
    activations.append('sigmoid')
    if prev_mass == 0:
        generate_model(
            f'results/{mass}GeV/top_{topology}_epochs_{epochs}_batch_{batch_size}/',
            data_folder,
            f'model_{folder}/preprocessed/[r,z,s1,s2]/{mass}GeV/',
            f'model_{folder}',
            f'golden_signal_{mass}GeV_Unif_Trigger_PSD_Run03.npz',
            'ML_bkgs_other_Run3_ROI_cuts_raw.npz',
            var_set = var_set,
            weight=4,
            test_split=.2,
            num_subsets=10,
            mi_thres=mi_thres,
            mi_input_val=mi_input,
            topology=topology,
            epochs=epochs,
            num_iterations=1,
            batch_size=batch_size,
            activations=activations,
            preprocess=True
        )
    else:
        generate_model(
            f'results/{mass}GeV/top_{topology}_epochs_{epochs}_batch_{batch_size}/',
            data_folder,
            f'model_{folder}/preprocessed/[r,z,s1,s2]/{mass}GeV/',
            f'model_{folder}',
            f'golden_signal_{mass}GeV_Unif_Trigger_PSD_Run03.npz',
            'ML_bkgs_other_Run3_ROI_cuts_raw.npz',
            load_model=f'results/{prev_mass}GeV/top_{topology}_epochs_{epochs}_batch_{batch_size}/',
            var_set = var_set,
            weight=4,
            test_split=.2,
            num_subsets=10,
            mi_thres=mi_thres,
            mi_input_val=mi_input,
            topology=topology,
            epochs=epochs,
            num_iterations=1,
            batch_size=batch_size,
            activations=activations,
            preprocess=True
        )

In [None]:
topology = [4,10,5,1]
epochs=2
batch_size=250

def generate_models(i,q):
    for j in range(len(signal)):    
        if j == 0:
            generateModel(
                signal[j],
                topology,
                epochs,
                batch_size,
                i,
                mi_input=1.0
            )
        else:
            generateModel(
                signal[j],
                topology,
                epochs,
                batch_size,
                i,
                prev_mass=signal[j-1],
                mi_input=1.0
            )
manager = mp.Manager()
q = manager.Queue()
pool = mp.Pool(10)


jobs = []
for i in range(0,1):
    job = pool.apply_async(generate_models, (i,q))
    jobs.append(job)

for job in jobs:
    job.get()

q.put('kill')
pool.close()
pool.join()
print("Done!")