In [None]:
# for reproducibility, before importing keras 
# we need to set random numbers in both numpy and tensorflow
import numpy as np
import tensorflow as tf
np.random.seed(98383822)
tf.random.set_seed(278732344)
import keras

import os
import collections
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import pandas as pd
import copy

In [None]:
debug=False
verbose=True
doTrain=True
doLoadModel=True
doLoadMetrics=True
doPlotMetrics=True
doPredict=True
doLoadPredict=True
doCalculateMetrics2=True
doPlotMetrics2=True
doCalculateMetrics3=True
doPlotMetrics3=True
# to distinguish between models, in order of positions:
# all options are chosen to have the same number of letters so that files names appear of the same zise with "ls"
# 0: optionNbHiddenLayers: 2, 3, 4
# 1: optionNbNodesPerHiddenLayer: 5
# 2: optionActivationFunctionHiddenLayer: E (elu), R (relu)
# 3: optionActivationFunctionLastLayer: TANH (tanh), SQNL (SQNL), SOSI (SoftSign)
# 4: optionOptimizer: Adam (Adam), AdaD (AdaDelta)
# 5: optionLossFunction: SH (squared hinge), RH (hinge, or regular hinge at power one)
#option="2_5_E_TANH_Adam_SH_A" # before
option="3_10_R_TANH_Adam_RH_B" # after comparing several this seems to be the best
#
flatMax=17
numberOfEpochs=1200
batchSize=50000
extensions="png,pdf"
modelName="Balanced"+str(flatMax)+"_"+option

In [None]:
os.getcwd()

In [None]:
def p(name,nparray):
    print("Start",name)
    print(nparray)
    print("End",name,"shape",nparray.shape,"dtype",nparray.dtype,"type",type(nparray))
# done function

In [None]:
#folderStem="/Volumes/Luiza_SSD"
#folderStem="/Users/luizaadelinaciucu/Work"
#
minNbPositiveHit="10"
inputFolderNameUnbalanced=folderStem+"/ATLAS/TrackML/output_new_ev_000_100"
inputFolderNameBalanced=inputFolderNameUnbalanced+"_min_"+minNbPositiveHit+"_balanced"+str(flatMax)
outputFolderName=inputFolderNameUnbalanced+"_min_"+minNbPositiveHit+"_NN_B_HP"
# if output folder does not exist, create it
if not os.path.exists(outputFolderName):
    os.makedirs(outputFolderName)

In [None]:
#eventNumber="000000082"
eventNumber="all"


if modelName=="Unbalanced":
    # without Balanced in the names
    #
    nparray_Input_Train=np.load(inputFolderNameUnbalanced+"/NN_2_data_Input_Train_"+eventNumber+".npy")
    nparray_Input_Test=np.load(inputFolderNameUnbalanced+"/NN_2_data_Input_Test_"+eventNumber+".npy")
    #
    nparray_Output_Train=np.load(inputFolderNameUnbalanced+"/NN_2_data_OutputMin"+minNbPositiveHit+"_Train_"+eventNumber+".npy")
    nparray_Output_Test=np.load(inputFolderNameUnbalanced+"/NN_2_data_OutputMin"+minNbPositiveHit+"_Test_"+eventNumber+".npy")
    #
    nparray_VolumeID_Train=np.load(inputFolderNameUnbalanced+"/NN_2_data_VolumeID_Train_"+eventNumber+".npy")
    nparray_VolumeID_Test=np.load(inputFolderNameUnbalanced+"/NN_2_data_VolumeID_Test_"+eventNumber+".npy")
elif modelName.startswith("Balanced"):
    # with Balanced in the names for input and then for Test you have two choice, also take from balanced or unbalanced
    
    # Train
    if True:
        # Train balanced
        nparray_Input_Train=np.load(inputFolderNameBalanced+"/NN_2_data_InputBalanced_Train_"+eventNumber+".npy")
        nparray_Output_Train=np.load(inputFolderNameBalanced+"/NN_2_data_OutputBalanced_Train_"+eventNumber+".npy")
        nparray_VolumeID_Train=np.load(inputFolderNameBalanced+"/NN_2_data_VolumeIDBalanced_Train_"+eventNumber+".npy")
    
    # Test two options, output also balanced or unbalanced
    if True:
        # Test unbalanced
        nparray_Input_Test=np.load(inputFolderNameUnbalanced+"/NN_2_data_Input_Test_"+eventNumber+".npy")
        nparray_Output_Test=np.load(inputFolderNameUnbalanced+"/NN_2_data_OutputMin"+minNbPositiveHit+"_Test_"+eventNumber+".npy")
        nparray_VolumeID_Test=np.load(inputFolderNameUnbalanced+"/NN_2_data_VolumeID_Test_"+eventNumber+".npy")
        
    if False:
        # Test balanced
        nparray_Input_Test=np.load(inputFolderNameBalanced+"/NN_2_data_InputBalanced_Test_"+eventNumber+".npy")
        nparray_Output_Test=np.load(inputFolderNameBalanced+"/NN_2_data_OutputBalanced_Test_"+eventNumber+".npy")
        nparray_VolumeID_Test=np.load(inputFolderNameBalanced+"/NN_2_data_VolumeIDBalanced_Test_"+eventNumber+".npy")
else:
    print("modelName",modelName,"not known. ERROR!")
    assert(False)
# done if

# convert output from int8 to float32, maybe it will give more precision in the results
# although you would expect Keras to convert internally in the format best for it
nparray_Output_Train = nparray_Output_Train.astype(np.float32)
nparray_Output_Test = nparray_Output_Test.astype(np.float32)

In [None]:
p("Input_Train",nparray_Input_Train)

In [None]:
p("Input_Test",nparray_Input_Test)

In [None]:
p ("Output_Train", nparray_Output_Train)

In [None]:
p ("Output_Test",nparray_Output_Test)

In [None]:
p("VolumeID_Train",nparray_VolumeID_Train)

In [None]:
p("VolumeID_Test",nparray_VolumeID_Test)

Creating model

In [None]:
# for reproducibility, before importing keras 
# we need to set random numbers in both numpy and tensorflow
#np.random.seed(98383822)
#tf.random.set_seed(2)
#import keras

nrNodesInputLayer=nparray_Input_Train.shape[1] # three inputs (x, y, z) for each hit in the batch
nrNodesOutputLayer=nparray_Output_Train.shape[1] # one output for each hit in the batch

In [None]:
def get_modelOld():
    
    # nr nodes on the hidden layers
    k=5
    nrNodesHiddenLayer=nrNodesOutputLayer*k

    # create empty model
    model=keras.models.Sequential()

    # add first layer 
    model.add(keras.layers.Dense(nrNodesInputLayer,activation='linear',input_shape=(nrNodesInputLayer,1)))

    # flatten first layer
    model.add(keras.layers.Flatten())

    # add hidden layers
    model.add(keras.layers.Dense(nrNodesHiddenLayer,activation='elu'))
    model.add(keras.layers.Dense(nrNodesHiddenLayer,activation='elu'))
    
    # add output layer
    model.add(keras.layers.Dense(nrNodesOutputLayer,activation='tanh'))

    # model geometry done

    # choosing how the NN learns
    # https://keras.io/models/sequential/
    # learning method squared hinge
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False),
        loss=keras.losses.squared_hinge,
        metrics=['binary_accuracy'],
        ),
    # done if

   # now model is done we are ready to train 
    return model
# done function 

In [None]:
# to use as activation function between -1 and 1, so instead of tanh
# code from the answer to
# https://stackoverflow.com/questions/57023350/implementing-the-square-non-linearity-sqnl-activation-function-in-keras
def square_non_linear(x):
    orig = x
    x = tf.where(orig >2.0, (tf.ones_like(x)) , x)
    x = tf.where(tf.logical_and(0.0 <= orig, orig <=2.0), (x - tf.math.square(x)/4.), x)
    x = tf.where(tf.logical_and(-2.0 <= orig, orig < 0), (x + tf.math.square(x)/4.), x)
    return tf.where(orig < -2.0, 0*x-1.0, x)

In [None]:
def get_model(option):
    
    list_option = option.split("_")
    if len(list_option)!=7:
        print("option",option,"list_option",list_option,"does not have 7 elements. Will ABORT!!!")
        assert(False)
    optionNbHiddenLayers=list_option[0]
    optionNbNodesPerHiddenLayer=list_option[1]
    optionActivationFunctionHiddenLayer=list_option[2]
    optionActivationFunctionLastLayer=list_option[3]
    optionOptimizer=list_option[4]
    optionLossFunction=list_option[5]
    optionDropoutLayer=list_option[6]
    
    if debug or verbose:
        print("optionNbHiddenLayers",optionNbHiddenLayers)
        print("optionNbNodesPerHiddenLayer",optionNbNodesPerHiddenLayer)
        print("optionActivationFunctionHiddenLayer",optionActivationFunctionHiddenLayer)
        print("optionActivationFunctionLastLayer",optionActivationFunctionLastLayer)
        print("optionOptimizer",optionOptimizer)
        print("optionLossFunction",optionLossFunction)
        print("optionDropoutLayer",optionDropoutLayer) # not used so far
    
    # nr nodes on the hidden layers
    k=int(optionNbNodesPerHiddenLayer) # e.g. 5
    nrNodesHiddenLayer=nrNodesOutputLayer*k

    # create empty model
    model=keras.models.Sequential()

    # add first layer 
    model.add(keras.layers.Dense(nrNodesInputLayer,activation='linear',input_shape=(nrNodesInputLayer,1)))

    # flatten first layer
    model.add(keras.layers.Flatten())
    
    if optionDropoutLayer == "D":
        model.add(keras.layers.Dropout(0.2))
    
    
    # calculate activationNameHiddenLayer
    if optionActivationFunctionHiddenLayer=="E":
        activationNameHiddenLayer="elu"
    elif optionActivationFunctionHiddenLayer=="R":
        activationNameHiddenLayer="relu"
    else:
        print("optionActivationFunctionHiddenLayer",optionActivationFunctionHiddenLayer, "not known. Will ABORT!!!")
        assert(False)
    # done if

    # add hidden layers
    if optionNbHiddenLayers=="1":
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
    elif optionNbHiddenLayers=="2":
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
    elif optionNbHiddenLayers=="3":
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
    elif optionNbHiddenLayers=="4":
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
    elif optionNbHiddenLayers=="5":
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
        model.add(keras.layers.Dense(nrNodesHiddenLayer,activation=activationNameHiddenLayer))
        if optionDropoutLayer == "C" or optionDropoutLayer == "D":
            model.add(keras.layers.Dropout(0.2))
    else:
        print("optionNbHiddenLayers",optionNbHiddenLayers, "not known. Will ABORT!!!")
        assert(False)
    # done if 
    
    if optionDropoutLayer == "A":
        pass
    elif optionDropoutLayer == "B":
        # add a dropout layer in the hidden layers
        model.add(keras.layers.Dropout(0.2))
    elif optionDropoutLayer == "C":
        pass
    elif optionDropoutLayer == "D":
        pass
    else:
        print("optionDropoutLayer",optionDropoutLayer, "not known. Will ABORT!!!")
        assert(False)
    # done if    
    
    # add output layer
    if optionActivationFunctionLastLayer=="TANH":
        my_activation='tanh'
    elif optionActivationFunctionLastLayer=="SQNL":
        my_activation=square_non_linear
    elif optionActivationFunctionLastLayer=="SOSI":
        my_activation='softsign'
    else:
        print("optionActivationFunctionLastLayer",optionActivationFunctionLastLayer,"not known. Will ABORT!!!")
        assert(False)
    # fi
    # add the layer with the chosen my_activation
    model.add(keras.layers.Dense(nrNodesOutputLayer,activation=my_activation))
    
    # model geometry done

    # choosing how the NN learns
    # https://keras.io/models/sequential/
   
    # set my_optimizer
    if optionOptimizer=="Adam":
        # learning method squared hinge
        my_optimizer=keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
        # my_optimizer=keras.optimizers.Adam(learning_rate=0.002, beta_1=0.9, beta_2=0.999, amsgrad=False)
    elif optionOptimizer=="AdaD":
        # learning method hinge (regular hinge)
        my_optimizer=keras.optimizers.Adadelta(learning_rate=0.001, rho=0.95, epsilon=1e-07)
    else:
        print("optionOptimizer",optionOptimizer,"not known. Will ABORT!!!")
        asert(False)
    # done if  
    
    # set my_loss
    if optionLossFunction=="SH":
        # learning method squared hinge
        my_loss=keras.losses.squared_hinge
    elif optionLossFunction=="RH":
        # learning method hinge (regular hinge)
        my_loss=keras.losses.hinge
    else:
        print("optionLossFunction",optionLossFunction,"not known. Will ABORT!!!")
        asert(False)
    # done if
    
    # compile the model with my chosen my_optimizer and my_loss
    model.compile(
        optimizer=my_optimizer,
        loss=my_loss,
        metrics=['binary_accuracy'],
        ),
    # done if
    
   # now model is done we are ready to train 
    return model
# done function 

In [None]:
model=get_model(option)

In [None]:
def train_model(model,modelName,numberOfEpochs,batchSize):
    print("*** Start train_model for modeName",modelName,"***")
    # train the model and return for each epoch the accuracy and loss values
    # in a variable called history
    # https://keras.io/models/sequential
    history=model.fit(
            nparray_Input_Train,
            nparray_Output_Train,
            batchSize,
            numberOfEpochs,
            validation_data=(nparray_Input_Test,nparray_Output_Test),
            shuffle=False,
            )
    # done if
    # the train (fit) function outputs a history
    # retrieve from it the accuracy, loss, train, test
    nparray_accuracyBinary_Train=history.history["binary_accuracy"]
    nparray_accuracyBinary_Test=history.history["val_binary_accuracy"]
    nparray_loss_Train=history.history["loss"]
    nparray_loss_Test=history.history["val_loss"]
    
    # save the loss, accuracies, weights + biases of the trained model to a file
    # create the name stem, specific for this training
    outputFileNameStem="NN_3_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    # create the file names for accuracy and loss, train and test
    outputFileNameAccuracyBinaryTrain=outputFolderName+"/"+outputFileNameStem+"_accuracyBinary_Train.npy"
    outputFileNameAccuracyBinaryTest=outputFolderName+"/"+outputFileNameStem+"_accuracyBinary_Test.npy"
    outputFileNameLossTrain=outputFolderName+"/"+outputFileNameStem+"_loss_Train.npy"
    outputFileNameLossTest=outputFolderName+"/"+outputFileNameStem+"_loss_Test.npy"
    # create the file name for the weights and biases
    outputFileNameWeights=outputFolderName+"/"+outputFileNameStem+"_weights.hdf5"
    
    # save to files
    #
    np.save(outputFileNameAccuracyBinaryTrain,nparray_accuracyBinary_Train)
    np.save(outputFileNameAccuracyBinaryTest,nparray_accuracyBinary_Test)
    np.save(outputFileNameLossTrain,nparray_loss_Train)
    np.save(outputFileNameLossTest,nparray_loss_Test)
    #
    model.save_weights(outputFileNameWeights)

    # ready to return
    return (model,nparray_accuracyBinary_Train,nparray_accuracyBinary_Test,nparray_loss_Train,nparray_loss_Test)
# done function

In [None]:
if doTrain:
    model,nparray_accuracyBinary_Train,nparray_accuracyBinary_Test,nparray_loss_Train,nparray_loss_Test=train_model(model,modelName=modelName,numberOfEpochs=numberOfEpochs,batchSize=batchSize)

In [None]:
def load_model(model,modelName,numberOfEpochs,batchSize):
    print("*** Start load_model for modeName",modelName,"***")
    # load the loss, accuracies, weights + biases of the trained model to a file
    
    # create the name stem, specific for this training
    outputFileNameStem="NN_3_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    
    # create the file name for the weights and biases
    outputFileNameWeights=outputFolderName+"/"+outputFileNameStem+"_weights.hdf5"
    
    # load the weights and biases
    model.load_weights(outputFileNameWeights)

    # ready to return
    return model
# done function

In [None]:
if doLoadModel:
    model=load_model(model,modelName,numberOfEpochs,batchSize)

In [None]:
def load_metrics(modelName,numberOfEpochs,batchSize):
    print("*** Start load_metrics for modeName",modelName,"***")
    # load the loss, accuracies, weights + biases of the trained model to a file
    
    # create the name stem, specific for this training
    outputFileNameStem="NN_3_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    
    # create the file names for accuracy and loss, train and test
    outputFileNameAccuracyBinaryTrain=outputFolderName+"/"+outputFileNameStem+"_accuracyBinary_Train.npy"
    outputFileNameAccuracyBinaryTest=outputFolderName+"/"+outputFileNameStem+"_accuracyBinary_Test.npy"
    outputFileNameLossTrain=outputFolderName+"/"+outputFileNameStem+"_loss_Train.npy"
    outputFileNameLossTest=outputFolderName+"/"+outputFileNameStem+"_loss_Test.npy"
    
    # retrieve from it the accuracy, loss, train, test
    nparray_accuracyBinary_Train=np.load(outputFileNameAccuracyBinaryTrain)
    nparray_accuracyBinary_Test=np.load(outputFileNameAccuracyBinaryTest)
    nparray_loss_Train=np.load(outputFileNameLossTrain)
    nparray_loss_Test=np.load(outputFileNameLossTest)

    # ready to return
    return (nparray_accuracyBinary_Train,nparray_accuracyBinary_Test,nparray_loss_Train,nparray_loss_Test)
# done function

In [None]:
if doLoadMetrics:
    nparray_accuracyBinary_Train,nparray_accuracyBinary_Test,nparray_loss_Train,nparray_loss_Test=load_metrics(modelName=modelName,numberOfEpochs=numberOfEpochs,batchSize=batchSize)

In [None]:
# overlay accuracy train and test
def plot_accuracy_Train_vs_Test(nparray_accuracyBinary_Train,nparray_accuracyBinary_Test,modelName):
    plt.plot(nparray_accuracyBinary_Train)
    plt.plot(nparray_accuracyBinary_Test)
    plt.title('Model_'+modelName+' accuracy')
    plt.ylabel('Binary accuracy')
    plt.xlabel('Epoch')
    #plt.xlim(left=0,right=10)
    # plt.ylim(bottom=0,top=0.8)
    #plt.ylim(bottom=0.75,top=0.80)
    plt.legend(['Train', 'Test'], loc='best')
    # plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_graph_accuracy."+extension)
# done function

In [None]:
if doPlotMetrics:
    plot_accuracy_Train_vs_Test(nparray_accuracyBinary_Train,nparray_accuracyBinary_Test,modelName)

In [None]:
# overlay loss train and test
def plot_loss_Train_vs_Test(nparray_loss_Train,nparray_loss_Test,modelName):
    plt.plot(nparray_loss_Train)
    plt.plot(nparray_loss_Test)
    plt.title('Model_'+modelName+' loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train','Test'],loc="upper left")
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_graph_loss."+extension)
# done function

In [None]:
if doPlotMetrics:
    plot_loss_Train_vs_Test(nparray_loss_Train,nparray_loss_Test,modelName)

In [None]:
def predict_from_model(model,nparray_Input_Train,nparray_Input_Test,nparray_Output_Train,nparray_Output_Test):
    outputFileNameStem="NN_4_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    
    # Train
    nparray_PredictedOutput_Train=model.predict(nparray_Input_Train)
    p("PredictedOutput_Train",nparray_PredictedOutput_Train)
    p("Output_Train",nparray_Output_Train)
    nparray_Diff_Train=nparray_PredictedOutput_Train-nparray_Output_Train
    p("Diff_Train",nparray_Diff_Train)
    # save numpy arrays to npy files
    outputFileNamePredictedOutput_Train=outputFolderName+"/"+outputFileNameStem+"_PredictedOutput_Train.npy"
    np.save(outputFileNamePredictedOutput_Train,nparray_PredictedOutput_Train)
    outputFileNameDiff_Train=outputFolderName+"/"+outputFileNameStem+"_Diff_Train.npy"
    np.save(outputFileNameDiff_Train,nparray_Diff_Train)
    
    # Test
    nparray_PredictedOutput_Test=model.predict(nparray_Input_Test)
    p("PredictedOutput_Test",nparray_PredictedOutput_Test)
    p("Output_Test",nparray_Output_Test)
    nparray_Diff_Test=nparray_PredictedOutput_Test-nparray_Output_Test
    p("Diff_Test",nparray_Diff_Test)
    # save numpy arrays to npy files
    outputFileNamePredictedOutput_Test=outputFolderName+"/"+outputFileNameStem+"_PredictedOutput_Test.npy"
    np.save(outputFileNamePredictedOutput_Test,nparray_PredictedOutput_Test)
    outputFileNameDiff_Test=outputFolderName+"/"+outputFileNameStem+"_Diff_Test.npy"
    np.save(outputFileNameDiff_Test,nparray_Diff_Test)
    
# done function    

In [None]:
if doPredict:
    predict_from_model(model,nparray_Input_Train,nparray_Input_Test,nparray_Output_Train,nparray_Output_Test)

In [None]:
def load_predicted():
    outputFileNameStem="NN_4_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    
    # Train
    outputFileNamePredictedOutput_Train=outputFolderName+"/"+outputFileNameStem+"_PredictedOutput_Train.npy"
    nparray_PredictedOutput_Train=np.load(outputFileNamePredictedOutput_Train)
    outputFileNameDiff_Train=outputFolderName+"/"+outputFileNameStem+"_Diff_Train.npy"
    nparray_Diff_Train=np.load(outputFileNameDiff_Train)
    
    # Test
    outputFileNamePredictedOutput_Test=outputFolderName+"/"+outputFileNameStem+"_PredictedOutput_Test.npy"
    nparray_PredictedOutput_Test=np.load(outputFileNamePredictedOutput_Test)
    outputFileNameDiff_Test=outputFolderName+"/"+outputFileNameStem+"_Diff_Test.npy"
    nparray_Diff_Test=np.load(outputFileNameDiff_Test)
    
    return nparray_PredictedOutput_Train, nparray_Diff_Train, nparray_PredictedOutput_Test, nparray_Diff_Test
# done function   

In [None]:
if doLoadPredict:
    nparray_PredictedOutput_Train, nparray_Diff_Train, nparray_PredictedOutput_Test, nparray_Diff_Test=load_predicted()

In [None]:
[i*0.8 for i in range(21)]

In [None]:
def calculate_metrics4(TrainOrTest, nparray_Output, nparray_PredictedOutput):
    nbBucket = len(nparray_Output)
    assert(nbBucket==len(nparray_PredictedOutput))
    print("nbBucket",nbBucket)
    bucketSize=20
    print("bucketSize",bucketSize)
    counter_particleTruth=0
    counter_particleRecon=0
    # loop over buckets
    for i in range(nbBucket):
        #if (4004<i<=4006)==False:
        #    continue
        if debug or i%100000==0:
            print("i",i)
        nparray_output=nparray_Output[i]
        nparray_outputPredicted=nparray_PredictedOutput[i]
        if debug:
            p("nparray_output",nparray_output)
            p("nparray_outputPredicted",nparray_outputPredicted)
        counter_Positive=0
        counter_truePositive=0
        # loop over hits in the bucket
        for j in range(bucketSize):
            output=nparray_output[j]
            outputPredicted=nparray_outputPredicted[j]
            if debug:
                print("j",j,"output",output,"outputPredicted",outputPredicted)
            if output>0:
                counter_Positive+=1
                if outputPredicted>0:
                    counter_truePositive+=1
                else:
                    pass
                # done if
            else:
                pass
            # done if
        # done for loop over hit (j)
        if debug:
            print("counter_Positive",counter_Positive,"counter_truePositive",counter_truePositive)
        if counter_Positive>=10:
            counter_particleTruth+=1
            if counter_truePositive/counter_Positive>0.8:
                counter_particleRecon+=1
            else:
                pass
        else:
            pass
    # done for loop over bucket (i)
    efficiency=100*counter_particleRecon/counter_particleTruth
    print(TrainOrTest,"efficiency=%.1f percent"%efficiency,", counter_particleTruth=%.0f"%counter_particleTruth,", counter_particleRecon=%.0f"%counter_particleRecon)
# done function

In [None]:
if doCalculateMetrics4:
    print("")
    print("Train")
    npparay_bla_Train=calculate_metrics4("Train", nparray_Output_Train, nparray_PredictedOutput_Train)

In [None]:
if doCalculateMetrics4:
    print("")
    print("Test")
    npparay_bla_Test=calculate_metrics4("Test", nparray_Output_Test, nparray_PredictedOutput_Test)

In [None]:
def calculate_metrics2(TrainOrTest, nparray_Output, nparray_PredictedOutput, nparray_VolumeID):
    #p("nparray_Output",nparray_Output)
    #p("nparray_PredictedOutput",nparray_PredictedOutput)
    
    # for loop over i (buckets)
    # each of this for each bucket have only one value
    list_bucket_OutputPositive=[]
    list_bucket_OutputNegative=[]
    list_bucket_PredictedOutputPositive=[]
    list_bucket_PredictedOutputNegative=[]
    list_bucket_TruePositive=[]
    list_bucket_FalsePositive=[]
    list_bucket_FalseNegative=[]
    list_bucket_TrueNegative=[]
    list_bucket_accuracy=[]
    list_bucket_precision=[]
    list_bucket_recall=[]
    list_bucket_negativePredictedValue=[]
    list_bucket_trueNegativeRate=[]
    # store for each bucket four values, from which all else can be computed again (TP, FP, FN, TN)
    #list_bucket_MetricBasic=[]
    
    # for each VolumeID we sum all the buckets in that VolumeID to get the total TP,FP,FN,TN in the bucket
    # first create for each VolumeID a numpy array of MetricBasic with 4 values set to zero
    dict_VolumeID_MetricBasic={}            
    
    # loop over all the buckets
    nbBucketTotal=len(nparray_Output)
    for i in range(nbBucketTotal):
        if i%100000==0:
            print(TrainOrTest+" bucket i",i,"/",nbBucketTotal)
        nparray_bucket_Output=nparray_Output[i]
        nparray_bucket_PredictedOutput=nparray_PredictedOutput[i]
        #p("nparray_bucket_Output",nparray_bucket_Output)
        #p("nparray_bucket_PredictedOutput",nparray_bucket_PredictedOutput)
        
        # for loop over j (hit)
        counter_hit_TP=0
        counter_hit_FP=0
        counter_hit_FN=0
        counter_hit_TN=0
        for j in range(len(nparray_bucket_Output)):
            #print ("hit j",j)
            hit_Output=nparray_bucket_Output[j]
            hit_PredictedOutput=nparray_bucket_PredictedOutput[j]
            #print("j",j,"hit_PredictedOutput - hit_Output",hit_PredictedOutput, hit_Output)
               
            # confusion matrix
            # TP FP
            # FN TN
            TP=0 # True Positive
            FP=0 # False Positive (type I error)
            FN=0 # False Negative (type II error)
            TN=0 # True Negative
 
            
            
            
            # for this hit ask conditions
            if hit_PredictedOutput>0:
                # the value is predicted positive
                if hit_Output>0:
                    # the value is actually positive
                    TP=1
                else:
                    # the value is actually negative 
                    FP=1
                # done if 
            else:
                # the value is predicted negative
                if hit_Output>0:
                    # the value is actually positive
                    FN=1
                else:
                    # the value is actually negative
                    TN=1
                # done if
            # done if 
            # for this hit only one of these four values is 1, the rest of three are zero
            #print("i",i,"j",j,"TP",TP,"FP",FP,"FN",FN,"TN",TN)
            
            # increment counters for hits
            counter_hit_TP+=TP
            counter_hit_FP+=FP
            counter_hit_FN+=FN
            counter_hit_TN+=TN  
            
            # put the 4 basic together to create the BasicMatric for this hit as a nparray
            MetricBasic=np.array([TP,FP,FN,TN])
            # add the basic metric for this hist to the dict_VolumeID_BasicMetric depending on the VolumeID of this hit
            # this is for the i bucket and inside the j hit
            VolumeID=nparray_VolumeID[i][j]
            if VolumeID not in dict_VolumeID_MetricBasic.keys():
                dict_VolumeID_MetricBasic[VolumeID]=MetricBasic
            else:
                dict_VolumeID_MetricBasic[VolumeID]+=MetricBasic
        # done for loop over j (hit)
        #print("i",i,"counter_hit_TP",counter_hit_TP,"counter_hit_FP",counter_hit_FP,"counter_hit_FN",counter_hit_FN,"counter_hit_TN",counter_hit_TN)
        
        bucket_OutputPositive=counter_hit_TP+counter_hit_FN 
        bucket_OutputNegative=counter_hit_FP+counter_hit_TN
        bucket_PredictedOutputPositive=counter_hit_TP+counter_hit_FP
        bucket_PredictedOutputNegative=counter_hit_FN+counter_hit_TN
        
        #print("i",i,"bucket_OutputPositive",bucket_OutputPositive)
        #print("i",i,"bucket_OutputNegative",bucket_OutputNegative)
        #print("i",i,"bucket_PredictedOutputPositive",bucket_PredictedOutputPositive)
        #print("i",i,"bucket_PredictedOutputNegative",bucket_PredictedOutputNegative)
        
        
        bucket_TruePositive=counter_hit_TP
        bucket_FalsePositive=counter_hit_FP
        bucket_FalseNegative=counter_hit_FN
        bucket_TrueNegative=counter_hit_TN

        # accuracy=(TP+TN)/(TP+FP+FN+TN)=(TP+TN)/ALL, ALL=20 (20 hits in a bucket)
        # precision=(TP)/(TP+FP)=(TP)/(all that are in reality positive)=efficiency from CERN 
        # e.g. there are 100 truth electrons, efficiency = what fraction of them are also reconstricted as electrons? 
        # recall=(TP)/(TP+FN)=(TP)/(all that are predicted positive) = one minus fake rate from CERN
        # e.g. fake rate = I have reconstructed 100 electrons. What fraction of these are not in reality truth electrons
        # fake rate = What fraction of reconstructed electrons are fake electrons?
        # fake rate = (FN)/(TP+FN) = 1 - recall
        
        # accuracy
        bucket_accuracy=(counter_hit_TP+counter_hit_TN)/(counter_hit_TP+counter_hit_FP+counter_hit_FN+counter_hit_TN)
        
        # https://en.wikipedia.org/wiki/Precision_and_recall
        
        # precision
        if (counter_hit_TP+counter_hit_FP)==0:
            bucket_precision=0
        else:
            bucket_precision=(counter_hit_TP)/(counter_hit_TP+counter_hit_FP)
        # done if
        
        # recall
        if counter_hit_TP+counter_hit_FN==0:
            bucket_recall=0
        else:
            bucket_recall=(counter_hit_TP)/(counter_hit_TP+counter_hit_FN)
        # done if
        #print("i",i,"bucket_accuracy",bucket_accuracy,"bucket_precision",bucket_precision,"bucket_recall",bucket_recall)
        
        # Negative predicted values
        if counter_hit_TN+counter_hit_FN==0:
            bucket_negativePredictedValue=0
        else:
            bucket_negativePredictedValue=(counter_hit_TN)/(counter_hit_TN+counter_hit_FN)
        # done if
        
        # True negative rate
        if counter_hit_TN+counter_hit_FP==0:
            bucket_trueNegativeRate=0
        else:
            bucket_trueNegativeRate=(counter_hit_TN)/(counter_hit_TN+counter_hit_FP)
        # done if 
        
        # the four basic metric froms which everythis calculated again are all integers with max value 20
        # are put in a list and the list made a numpy array of positive integers uint8
        #bucket_MetricBasic=np.array([counter_hit_TP,counter_hit_FP,counter_hit_FN,counter_hit_TN]).astype(np.uint8)
        
        
        # add to current bucket to the list across all buckets
        list_bucket_OutputPositive.append(bucket_OutputPositive)
        list_bucket_OutputNegative.append(bucket_OutputNegative)
        list_bucket_PredictedOutputPositive.append(bucket_PredictedOutputPositive)
        list_bucket_PredictedOutputNegative.append(bucket_PredictedOutputNegative)
        list_bucket_TruePositive.append(bucket_TruePositive)
        list_bucket_FalsePositive.append(bucket_FalsePositive)
        list_bucket_FalseNegative.append(bucket_FalseNegative)
        list_bucket_TrueNegative.append(bucket_TrueNegative)
        list_bucket_accuracy.append(bucket_accuracy)
        list_bucket_precision.append(bucket_precision)
        list_bucket_recall.append(bucket_recall)
        list_bucket_negativePredictedValue.append(bucket_negativePredictedValue)
        list_bucket_trueNegativeRate.append(bucket_trueNegativeRate)
        #
        #list_bucket_MetricBasic.append(bucket_MetricBasic)
        
    # done for loop over i (bucket)
    
    #print("list_bucket_accuracy",list_bucket_accuracy)
    #print("list_bucket_precision",list_bucket_precision)
    #print("list_bucket_recall",list_bucket_recall)
    
    # convert list to numpy array
    nparray_bucket_OutputPositive=np.array(list_bucket_OutputPositive)
    nparray_bucket_OutputNegative=np.array(list_bucket_OutputNegative)
    nparray_bucket_PredictedOutputPositive=np.array(list_bucket_PredictedOutputPositive)
    nparray_bucket_PredictedOutputNegative=np.array(list_bucket_PredictedOutputNegative)
    nparray_bucket_TruePositive=np.array(list_bucket_TruePositive)
    nparray_bucket_FalsePositive=np.array(list_bucket_FalsePositive)
    nparray_bucket_FalseNegative=np.array(list_bucket_FalseNegative)
    nparray_bucket_TrueNegative=np.array(list_bucket_TrueNegative)
    nparray_bucket_accuracy=np.array(list_bucket_accuracy)
    nparray_bucket_precision=np.array(list_bucket_precision)
    nparray_bucket_recall=np.array(list_bucket_recall)
    nparray_bucket_negativePredictedValue=np.array(list_bucket_negativePredictedValue)
    nparray_bucket_trueNegativeRate=np.array(list_bucket_trueNegativeRate)
    #
    #nparray_bucket_MetricBasic=np.array(list_bucket_MetricBasic)
    
    p("nparray_bucket_OutputPositive",nparray_bucket_OutputPositive)
    p("nparray_bucket_OutputNegative",nparray_bucket_OutputNegative)
    p("nparray_bucket_PredictedOutputPositive",nparray_bucket_PredictedOutputPositive)
    p("nparray_bucket_PredictedOutputNegative",nparray_bucket_PredictedOutputNegative)
    p("nparray_bucket_TruePositive",nparray_bucket_TruePositive)
    p("nparray_bucket_FalsePositive",nparray_bucket_FalsePositive)
    p("nparray_bucket_FalseNegative",nparray_bucket_FalseNegative)
    p("nparray_bucket_TrueNegative",nparray_bucket_TrueNegative)
    p("nparray_bucket_accuracy",nparray_bucket_accuracy)
    p("nparray_bucket_precision",nparray_bucket_precision)
    p("nparray_bucket_recall",nparray_bucket_recall)
    p("nparray_bucket_negativePredictedValue",nparray_bucket_negativePredictedValue)
    p("nparray_bucket_trueNegativeRate",nparray_bucket_trueNegativeRate)
    #
    #p("nparray_bucket_MetricBasic",nparray_bucket_MetricBasic)
    
    # save numpy arrays to file, first create the common part of the name based on the current model
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    
    # create the name of each numpy array and save it
    outputFileName_OutputPositive=outputFolderName+"/"+outputFileNameStem+"_OutputPositive_"+TrainOrTest+".npy"
    np.save(outputFileName_OutputPositive,nparray_bucket_OutputPositive)
    outputFileName_OutputNegative=outputFolderName+"/"+outputFileNameStem+"_OutputNegative_"+TrainOrTest+".npy"
    np.save(outputFileName_OutputNegative,nparray_bucket_OutputNegative)
    outputFileName_OutputNegative=outputFolderName+"/"+outputFileNameStem+"_OutputNegative_"+TrainOrTest+".npy"
    np.save(outputFileName_OutputNegative,nparray_bucket_OutputNegative)
    outputFileName_PredictedOutputPositive=outputFolderName+"/"+outputFileNameStem+"_PredictedOutputPositive_"+TrainOrTest+".npy"
    np.save(outputFileName_PredictedOutputPositive,nparray_bucket_PredictedOutputPositive)
    outputFileName_PredictedOutputNegative=outputFolderName+"/"+outputFileNameStem+"_PredictedOutputNegative_"+TrainOrTest+".npy"
    np.save(outputFileName_PredictedOutputNegative,nparray_bucket_PredictedOutputNegative)
    outputFileName_TruePositive=outputFolderName+"/"+outputFileNameStem+"_TruePositive_"+TrainOrTest+".npy"
    np.save(outputFileName_TruePositive,nparray_bucket_TruePositive)
    outputFileName_FalsePositive=outputFolderName+"/"+outputFileNameStem+"_FalsePositive_"+TrainOrTest+".npy"
    np.save(outputFileName_FalsePositive,nparray_bucket_FalsePositive)
    outputFileName_FalseNegative=outputFolderName+"/"+outputFileNameStem+"_FalseNegative_"+TrainOrTest+".npy"
    np.save(outputFileName_FalseNegative,nparray_bucket_FalseNegative)
    outputFileName_TrueNegative=outputFolderName+"/"+outputFileNameStem+"_TrueNegative_"+TrainOrTest+".npy"
    np.save(outputFileName_TrueNegative,nparray_bucket_TrueNegative)
    outputFileName_accuracy=outputFolderName+"/"+outputFileNameStem+"_Accuracy_"+TrainOrTest+".npy"
    np.save(outputFileName_accuracy,nparray_bucket_accuracy)
    outputFileName_precision=outputFolderName+"/"+outputFileNameStem+"_Precision_"+TrainOrTest+".npy"
    np.save(outputFileName_precision,nparray_bucket_precision)
    outputFileName_recall=outputFolderName+"/"+outputFileNameStem+"_Recall_"+TrainOrTest+".npy"
    np.save(outputFileName_recall,nparray_bucket_recall)
    outputFileName_negativePredictedValue=outputFolderName+"/"+outputFileNameStem+"_NegativePredictedValue_"+TrainOrTest+".npy"
    np.save(outputFileName_negativePredictedValue,nparray_bucket_negativePredictedValue)
    outputFileName_trueNegativeRate=outputFolderName+"/"+outputFileNameStem+"_TrueNegativeRate_"+TrainOrTest+".npy"
    np.save(outputFileName_trueNegativeRate,nparray_bucket_trueNegativeRate)
    #
    #outputFileName_MetricBasic=outputFolderName+"/"+outputFileNameStem+"_MetricBasic_"+TrainOrTest+".npy"
    #np.save(outputFileName_MetricBasic,nparray_bucket_MetricBasic)
    
    # done all, ready to return
    return nparray_bucket_OutputPositive,nparray_bucket_OutputNegative,nparray_bucket_PredictedOutputPositive,nparray_bucket_PredictedOutputNegative,nparray_bucket_TruePositive,nparray_bucket_FalsePositive,nparray_bucket_FalseNegative,nparray_bucket_TrueNegative,nparray_bucket_accuracy,nparray_bucket_precision,nparray_bucket_recall,nparray_bucket_negativePredictedValue,nparray_bucket_trueNegativeRate,dict_VolumeID_MetricBasic
# done function

In [None]:
if doCalculateMetrics2:
    print("")
    print("Train")
    nparray_bucket_OutputPositive_Train,nparray_bucket_OutputNegative_Train,nparray_bucket_PredictedOutputPositive_Train,nparray_bucket_PredictedOutputNegative_Train,nparray_bucket_TruePositive_Train,nparray_bucket_FalsePositive_Train,nparray_bucket_FalseNegative_Train,nparray_bucket_TrueNegative_Train,nparray_bucket_accuracy_Train,nparray_bucket_precision_Train,nparray_bucket_recall_Train,nparray_bucket_negativePredictedValue_Train,nparray_bucket_trueNegativeRate_Train,dict_VolumeID_MetricBasic_Train=calculate_metrics2("Train", nparray_Output_Train, nparray_PredictedOutput_Train, nparray_VolumeID_Train)

In [None]:
if doCalculateMetrics2:
    print("")
    print("Test")
    nparray_bucket_OutputPositive_Test,nparray_bucket_OutputNegative_Test,nparray_bucket_PredictedOutputPositive_Test,nparray_bucket_PredictedOutputNegative_Test,nparray_bucket_TruePositive_Test,nparray_bucket_FalsePositive_Test,nparray_bucket_FalseNegative_Test,nparray_bucket_TrueNegative_Test,nparray_bucket_accuracy_Test,nparray_bucket_precision_Test,nparray_bucket_recall_Test,nparray_bucket_negativePredictedValue_Test,nparray_bucket_trueNegativeRate_Test,dict_VolumeID_MetricBasic_Test=calculate_metrics2("Test", nparray_Output_Test, nparray_PredictedOutput_Test, nparray_VolumeID_Test)

In [None]:
# make bins centered around their values by shifting them to the left
bins_int_0_21 = [i-0.5 for i in range(22)]
bins_float_0_1 = [i/20-0.025 for i in range(22)]
print("bins_int_0_21",bins_int_0_21)
print("bins_float_0_1",bins_float_0_1)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_OutputPositive(nparray_bucket_OutputPositive_Train,nparray_bucket_OutputPositive_Test,modelName):
    plt.hist(nparray_bucket_OutputPositive_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_OutputPositive_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test") 
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket OutputPositive')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputPositive."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_OutputPositive(nparray_bucket_OutputPositive_Train,nparray_bucket_OutputPositive_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_PredictedOutputPositive(nparray_bucket_PredictedOutputPositive_Train,nparray_bucket_PredictedOutputPositive_Test,modelName):
    plt.hist(nparray_bucket_PredictedOutputPositive_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_PredictedOutputPositive_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket PredictedOutputPositive')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_PredictedOutputPositive."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_PredictedOutputPositive(nparray_bucket_PredictedOutputPositive_Train,nparray_bucket_PredictedOutputPositive_Test,modelName)

In [None]:
if doCalculateMetrics2:
    fig, ax = plt.subplots()
    h=ax.hist2d(
        nparray_bucket_OutputPositive_Train,
        nparray_bucket_PredictedOutputPositive_Train,
        bins=[bins_int_0_21,bins_int_0_21],
        cmin=1
        )
    plt.colorbar(h[3], ax=ax)
    plt.xlabel("Output Positive")
    plt.ylabel("Predicted Output Positive")
    plt.title(modelName+" Train")
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputPositive_vs_PredictedOutputPositive_Train."+extension)

In [None]:
if doCalculateMetrics2:
    fig, ax = plt.subplots()
    h=ax.hist2d(
        nparray_bucket_OutputPositive_Test,
        nparray_bucket_PredictedOutputPositive_Test,
        bins=[bins_int_0_21,bins_int_0_21],
        cmin=1
        )
    plt.colorbar(h[3], ax=ax)
    plt.xlabel("Output Positive")
    plt.ylabel("Predicted Output Positive")
    plt.title(modelName+" Test")
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputPositive_vs_PredictedOutputPositive_Test."+extension)

In [None]:
#assert False

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_OutputNegative(nparray_bucket_OutputNegative_Train,nparray_bucket_OutputNegative_Test,modelName):
    plt.hist(nparray_bucket_OutputNegative_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_OutputNegative_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket OutputNegative')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputNegative."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_OutputNegative(nparray_bucket_OutputNegative_Train,nparray_bucket_OutputNegative_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_PredictedOutputPositive(nparray_bucket_PredictedOutputPositive_Train,nparray_bucket_PredictedOutputPositive_Test,modelName):
    plt.hist(nparray_bucket_PredictedOutputPositive_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_PredictedOutputPositive_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket PredictedOutputPositive')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_PredictedOutputPositive."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_PredictedOutputPositive(nparray_bucket_PredictedOutputPositive_Train,nparray_bucket_PredictedOutputPositive_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_PredictedOutputNegative(nparray_bucket_PredictedOutputNegative_Train,nparray_bucket_PredictedOutputNegative_Test,modelName):
    plt.hist(nparray_bucket_PredictedOutputNegative_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_PredictedOutputNegative_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket PredictedOutputNegative')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_PredictedOutputNegative."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_PredictedOutputNegative(nparray_bucket_PredictedOutputNegative_Train,nparray_bucket_PredictedOutputNegative_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_TruePositive(nparray_bucket_TruePositive_Train,nparray_bucket_TruePositive_Test,modelName):
    plt.hist(nparray_bucket_TruePositive_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_TruePositive_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket TruePositive')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_TruePositive."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_TruePositive(nparray_bucket_TruePositive_Train,nparray_bucket_TruePositive_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_FalsePositive(nparray_bucket_FalsePositive_Train,nparray_bucket_FalsePositive_Test,modelName):
    plt.hist(nparray_bucket_FalsePositive_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_FalsePositive_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket FalsePositive')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_FalsePositive."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_FalsePositive(nparray_bucket_FalsePositive_Train,nparray_bucket_FalsePositive_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_FalseNegative(nparray_bucket_FalseNegative_Train,nparray_bucket_FalseNegative_Test,modelName):
    plt.hist(nparray_bucket_FalseNegative_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_FalseNegative_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket FalseNegative')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_FalseNegative."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_FalseNegative(nparray_bucket_FalseNegative_Train,nparray_bucket_FalseNegative_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_TrueNegative(nparray_bucket_TrueNegative_Train,nparray_bucket_TrueNegative_Test,modelName):
    plt.hist(nparray_bucket_TrueNegative_Train,bins=bins_int_0_21,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_TrueNegative_Test,bins=bins_int_0_21,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket TrueNegative')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_TrueNegative."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_TrueNegative(nparray_bucket_TrueNegative_Train,nparray_bucket_TrueNegative_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_accuracy(nparray_bucket_accuracy_Train,nparray_bucket_accuracy_Test,modelName):
    plt.hist(nparray_bucket_accuracy_Train,bins=bins_float_0_1,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_accuracy_Test,bins=bins_float_0_1,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket accuracy')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_accuracy."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_accuracy(nparray_bucket_accuracy_Train,nparray_bucket_accuracy_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_precision(nparray_bucket_precision_Train,nparray_bucket_precision_Test,modelName):
    plt.hist(nparray_bucket_precision_Train,bins=bins_float_0_1,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_precision_Test,bins=bins_float_0_1,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket precision')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_precision."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_precision(nparray_bucket_precision_Train,nparray_bucket_precision_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_recall(nparray_bucket_recall_Train,nparray_bucket_recall_Test,modelName):
    plt.hist(nparray_bucket_recall_Train,bins=bins_float_0_1,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_recall_Test,bins=bins_float_0_1,density=True,alpha=1,color="red",histtype='step',label="Test")  
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket recall')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_recall."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_recall(nparray_bucket_recall_Train,nparray_bucket_recall_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_negativePredictedValue(nparray_bucket_negativePredictedValue_Train,nparray_bucket_negativePredictedValue_Test,modelName):
    plt.hist(nparray_bucket_negativePredictedValue_Train,bins=bins_float_0_1,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_negativePredictedValue_Test,bins=bins_float_0_1,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket negativePredictedValue')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_negativePredictedValue."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_negativePredictedValue(nparray_bucket_negativePredictedValue_Train,nparray_bucket_negativePredictedValue_Test,modelName)

In [None]:
# overlay train and test we expect train to be better than test 
def plot_train_test_trueNegativeRate(nparray_bucket_trueNegativeRate_Train,nparray_bucket_trueNegativeRate_Test,modelName):
    plt.hist(nparray_bucket_trueNegativeRate_Train,bins=bins_float_0_1,density=True,alpha=1,color="blue",histtype='step',label="Train")
    plt.hist(nparray_bucket_trueNegativeRate_Test,bins=bins_float_0_1,density=True,alpha=1,color="red",histtype='step',label="Test")
    plt.ylabel('Nr of buckets')
    plt.xlabel('Bucket trueNegativeRate')
    plt.title(modelName)
    plt.legend(loc='upper left')
    #plt.ylim(bottom=0,top=250)
    #plt.show()
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_trueNegativeRate."+extension)
# done function
if doPlotMetrics2:
    plot_train_test_trueNegativeRate(nparray_bucket_trueNegativeRate_Train,nparray_bucket_trueNegativeRate_Test,modelName)
    

In [None]:
if doCalculateMetrics2:
    fig, ax = plt.subplots()
    h=ax.hist2d(
        nparray_bucket_OutputPositive_Train,
        nparray_bucket_PredictedOutputPositive_Train,
        bins=[bins_int_0_21,bins_int_0_21],
        cmin=1
        )
    plt.colorbar(h[3], ax=ax)
    plt.xlabel("Output Positive")
    plt.ylabel("Predicted Output Positive")
    plt.title(modelName+" Train")
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputPositive_vs_PredictedOutputPositive_Train."+extension)

In [None]:
if doCalculateMetrics2:
    fig, ax = plt.subplots()
    h=ax.hist2d(
        nparray_bucket_OutputPositive_Test,
        nparray_bucket_PredictedOutputPositive_Test,
        bins=[bins_int_0_21,bins_int_0_21],
        cmin=1
        )
    plt.colorbar(h[3], ax=ax)
    plt.xlabel("Output Positive")
    plt.ylabel("Predicted Output Positive")
    plt.title(modelName+" Test")
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputPositive_vs_PredictedOutputPositive_Test."+extension)

In [None]:
if doCalculateMetrics2:
    fig, ax = plt.subplots()
    h=ax.hist2d(
        nparray_bucket_OutputNegative_Train,
        nparray_bucket_PredictedOutputNegative_Train,
        bins=[bins_int_0_21,bins_int_0_21],
        cmin=1,
        # norm=LogNorm()
        )
    plt.colorbar(h[3], ax=ax)
    plt.xlabel("Output Negative")
    plt.ylabel("Predicted Output Negative")
    plt.title(modelName+" Train")
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputNegative_vs_PredictedOutputNegative_Train."+extension) 
    

In [None]:
if doCalculateMetrics2:
    fig, ax = plt.subplots()

    h=ax.hist2d(
        nparray_bucket_OutputNegative_Test,
        nparray_bucket_PredictedOutputNegative_Test,
        bins=[bins_int_0_21,bins_int_0_21],
        cmin=1
        )
    plt.colorbar(h[3], ax=ax)
    plt.xlabel("Output Negative")
    plt.ylabel("Predicted Output Negative")
    plt.title(modelName+" Test")
    outputFileNameStem="NN_5_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    for extension in extensions.split(","):
        plt.savefig(outputFolderName+"/"+outputFileNameStem+"_histo_OutputNegative_vs_PredictedOutputNegative_Test."+extension)

In [None]:
if doCalculateMetrics2:
    # Confusion matrix Train with no reweighting
    nbHitTP=np.sum(nparray_bucket_TruePositive_Train)
    nbHitFP=np.sum(nparray_bucket_FalsePositive_Train)
    nbHitFN=np.sum(nparray_bucket_FalseNegative_Train)
    nbHitTN=np.sum(nparray_bucket_TrueNegative_Train)
    nrHitsAll=nbHitTP+nbHitFP+nbHitFN+nbHitTN
    nrHitsPercentTP=100*nbHitTP/nrHitsAll
    nrHitsPercentFP=100*nbHitFP/nrHitsAll
    nrHitsPercentFN=100*nbHitFN/nrHitsAll
    nrHitsPercentTN=100*nbHitTN/nrHitsAll
    print("Train Hits. Percent TP=%.1f FP=%.1f FN=%.1f TN=%.1f"%(nrHitsPercentTP,nrHitsPercentFP,nrHitsPercentFN,nrHitsPercentTN))

In [None]:
if doCalculateMetrics2:
    # Confusion matrix Train with no reweighting
    nbHitTP=np.sum(nparray_bucket_TruePositive_Test)
    nbHitFP=np.sum(nparray_bucket_FalsePositive_Test)
    nbHitFN=np.sum(nparray_bucket_FalseNegative_Test)
    nbHitTN=np.sum(nparray_bucket_TrueNegative_Test)
    nrHitsAll=nbHitTP+nbHitFP+nbHitFN+nbHitTN
    nrHitsPercentTP=100*nbHitTP/nrHitsAll
    nrHitsPercentFP=100*nbHitFP/nrHitsAll
    nrHitsPercentFN=100*nbHitFN/nrHitsAll
    nrHitsPercentTN=100*nbHitTN/nrHitsAll
    print("Test Hits. Percent TP=%.1f FP=%.1f FN=%.1f TN=%.1f"%(nrHitsPercentTP,nrHitsPercentFP,nrHitsPercentFN,nrHitsPercentTN))

In [None]:
def get_metrics(TP,FP,FN,TN,debug=False):
    
    if debug:
        print("TP",TP,"FP",FP,"FN",FN,"TN",TN)
        
    nbTotal=TP+FP+FN+TN
    #
    TPPercent=100*TP/nbTotal
    FPPercent=100*FP/nbTotal
    FNPercent=100*FN/nbTotal
    TNPercent=100*TN/nbTotal
    if debug:
        print("TPPercent",TPPercent,"FPPercent",FPPercent,"FNPercent",FNPercent,"TNPercent",TNPercent)
        
    #
    OutputPositive=TP+FN 
    OutputNegative=FP+TN
    PredictedOutputPositive=TP+FP
    PredictedOutputNegative=FN+TN
    if debug:
        print("OutputPositive",OutputPositive,"OutputNegative",OutputNegative,"PredictedOutputPositive",PredictedOutputPositive,"PredictedOutputNegative",PredictedOutputNegative)
        
    OutputPositivePercent=100*OutputPositive/nbTotal
    OutputNegativePercent=100*OutputNegative/nbTotal    
    PredictedOutputPositivePercent=100*PredictedOutputPositive/nbTotal        
    PredictedOutputNegativePercent=100*PredictedOutputNegative/nbTotal
    if debug:
        print("OutputPositivePercent",OutputPositivePercent,"OutputNegativePercent",OutputNegativePercent,"PredictedOutputPositivePercent",PredictedOutputPositivePercent,"PredictedOutputNegativePercent",PredictedOutputNegativePercent)
        
    # https://en.wikipedia.org/wiki/Precision_and_recall
    # accuracy=(TP+TN)/(TP+FP+FN+TN)=(TP+TN)/ALL, ALL=20 (20 hits in a bucket)
    # precision=(TP)/(TP+FP)=(TP)/(all that are in reality positive)=efficiency from CERN 
    # e.g. there are 100 truth electrons, efficiency = what fraction of them are also reconstricted as electrons? 
    # recall=(TP)/(TP+FN)=(TP)/(all that are predicted positive) = one minus fake rate from CERN
    # e.g. fake rate = I have reconstructed 100 electrons. What fraction of these are not in reality truth electrons
    # fake rate = What fraction of reconstructed electrons are fake electrons?
    # fake rate = (FN)/(TP+FN) = 1 - recall
        
    # accuracy
    accuracy=(TP+TN)/(TP+FP+FN+TN)

    # precision
    if (TP+FP)==0:
        precision=0
    else:
        precision=(TP)/(TP+FP)
    # done if
        
    # recall
    if TP+FN==0:
        recall=0
    else:
        recall=(TP)/(TP+FN)
    # done if
    
    # precision for negative: negative predicted value
    if TN+FN==0:
        negativePredictedValue=0
    else:
        negativePredictedValue=(TN)/(TN+FN)
    # done if
        
    # recall for negative: true negative rate
    if TN+FP==0:
        trueNegativeRate=0
    else:
        trueNegativeRate=(TN)/(TN+FP)
    # done if 
    
    if debug:
        print("accuracy",accuracy,"precision",precision,"recall",recall,"negativePredictedValue",negativePredictedValue,"trueNegativeRate",trueNegativeRate)
        
    # return only one dictionary
    dict_var_value={}
    dict_var_value["NbTotal"]=nbTotal
    dict_var_value["TP"]=TP
    dict_var_value["FP"]=FP
    dict_var_value["FN"]=FN
    dict_var_value["TN"]=TN
    dict_var_value["TPPercent"]=TPPercent
    dict_var_value["FPPercent"]=FPPercent
    dict_var_value["FNPercent"]=FNPercent
    dict_var_value["TNPercent"]=TNPercent
    dict_var_value["OutputPositive"]=OutputPositive
    dict_var_value["OutputNegative"]=OutputNegative
    dict_var_value["PredictedOutputPositive"]=PredictedOutputPositive
    dict_var_value["PredictedOutputNegative"]=PredictedOutputNegative
    dict_var_value["OutputPositivePercent"]=OutputPositivePercent
    dict_var_value["OutputNegativePercent"]=OutputNegativePercent
    dict_var_value["PredictedOutputPositivePercent"]=PredictedOutputPositivePercent
    dict_var_value["PredictedOutputNegativePercent"]=PredictedOutputNegativePercent
    dict_var_value["Accuracy"]=accuracy
    dict_var_value["Precision"]=precision
    dict_var_value["Recall"]=recall
    dict_var_value["NegativePredictedValue"]=negativePredictedValue
    dict_var_value["TrueNegativeRate"]=trueNegativeRate
    
    if debug:
        print("Print dict_var_value")
        for var in sorted(dict_var_value.keys()):
            print("var",var,"value",dict_var_value[var])
    
    # all done, ready to return
    return dict_var_value
# done function

In [None]:
def calculate_metrics3(TrainOrTest, dict_VolumeID_MetricBasic):
    if debug or verbose:
        print("Start calculate_metrics3 for ",TrainOrTest)
    
    # calculate list of VolumeID in increasing order
    list_VolumeID=sorted(dict_VolumeID_MetricBasic.keys())
    
    # now for each VolumeID we calculate all the metrics
    # so for each metric we put the values for each VolumeID one after the other
    # so we can put in a list and from the list make a nparray and save that nparray, for later to overlay plots
    dict_var_list_value={}
    # loop over volumes in their order
    for VolumeID in list_VolumeID:
        MetricBasic=dict_VolumeID_MetricBasic[VolumeID]
        TP=MetricBasic[0]
        FP=MetricBasic[1]
        FN=MetricBasic[2]
        TN=MetricBasic[3]
        # from these 4 values, calculate the other metrics and figures of merit 
        dict_var_value=get_metrics(TP,FP,FN,TN,debug=debug)
        for var in dict_var_value.keys():
            value=dict_var_value[var]
            if var not in dict_var_list_value.keys():
                # create a list with one value, value for the current VolumeID
                dict_var_list_value[var]=[value]
            else:
                # to the already existing list add value for the current VolumeID
                dict_var_list_value[var].append(value)
            # done if
        # done for loop over var
    # done for loop over VolumeID
    
    #
    outputFileNameStem=outputFolderName+"/"+"NN_7_"+modelName+"_"+str(numberOfEpochs)+"_"+str(batchSize)
    
    # save the npparray_volume_id
    nparray_volume_id=np.array(list_VolumeID)
    p("nparray_volume_id",nparray_volume_id)
    var="VolumeID"
    outputFileNameNpy=outputFileNameStem+"_nparray_"+var+"_"+TrainOrTest+".npy"
    np.save(outputFileNameNpy,nparray_volume_id)
    
    # for loop over var
    for var in sorted(dict_var_list_value.keys()):
        list_value=dict_var_list_value[var]
        nparray_value=np.array(list_value)
        #p("nparray_"+var,nparray_value)
        print("var",var)
        outputFileNameNpy=outputFileNameStem+"_nparray_"+var+"_VolumeID_"+TrainOrTest+".npy"
        np.save(outputFileNameNpy,nparray_value)
        # create plot here
        plt.plot(nparray_value,marker="o")
        plt.xlabel("volume_id")
        plt.xticks(range(len(nparray_volume_id)),nparray_volume_id)
        plt.ylabel(var)
        plt.title(TrainOrTest)
        #plt.legend(loc='best')
        #plt.show()
        for extension in "png,pdf".split(","):
            plt.savefig(outputFileNameStem+"_plot_graph_"+var+"_VolumeID_"+TrainOrTest+"."+extension)
        # done for loop
        plt.close()
    # done for loop over var
    
    # calculate the percentage of the numbrer of hits in each VolumeID
    nparray_nbTotal=np.array(dict_var_list_value["NbTotal"])
    sum_nbTotal=np.sum(nparray_nbTotal)
    nparray_nbTotalPercent=100*nparray_nbTotal/sum_nbTotal
    p("nparray_nbTotalPercent",nparray_nbTotalPercent)
    var="NbTotalPercent"
    outputFileNameNpy=outputFileNameStem+"_nparray_"+var+"_VolumeID_"+TrainOrTest+".npy"
    np.save(outputFileNameNpy,nparray_nbTotalPercent)
    # create plot here
    plt.plot(nparray_value,marker="o")
    plt.xlabel("volume_id")
    plt.xticks(range(len(nparray_volume_id)),nparray_volume_id)
    plt.ylabel(var)
    plt.title(TrainOrTest)
    #plt.legend(loc='best')
    #plt.show()
    for extension in "png,pdf".split(","):
        plt.savefig(outputFileNameStem+"_plot_graph_"+var+"_VolumeID_"+TrainOrTest+"."+extension)
    # done for loop
    plt.close()
    
# done function

In [None]:
if doPlotMetrics3:
    calculate_metrics3("Train",dict_VolumeID_MetricBasic_Train)

In [None]:
if doPlotMetrics3:
    calculate_metrics3("Test",dict_VolumeID_MetricBasic_Test)