# Hyperparameter Optimisation

- try different combinations, get rid of NR noise etc.
- sqrt threshold data

In [3]:
import numpy as np
from skimage import io
from os import listdir
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
import time

from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.utils import shuffle

### GPU testing

In [4]:
with tf.device('/GPU:0'):

    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [5]:
def data_func_stack(file_type_list,sqrt_scale):
    
    im_stack_plain = np.stack([np.sqrt(np.load('np_data/NR_noise'+f+'.npy')) if s \
                               else np.load('np_data/NR_noise'+f+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
    
    im_stack = np.stack([np.sqrt(np.load('np_data/Migdal_noise'+f+'.npy')) if s \
                               else np.load('np_data/Migdal_noise'+f+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
    
    e_stack = np.stack([np.sqrt(np.load('np_data/Electron_noise'+f+'.npy')) if s \
                               else np.load('np_data/Electron_noise'+f+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
    
    not_mig = np.concatenate([im_stack_plain,e_stack])
    
    shuffle_index = np.arange(len(not_mig))
    np.random.shuffle(shuffle_index)

    not_mig_shuff = not_mig[shuffle_index]
    new_not_mig = not_mig_shuff[:len(im_stack)]
    
    labels = np.concatenate([np.zeros_like(im_stack[:,0,0,0]),np.ones_like(new_not_mig[:,0,0,0])])
    data = np.concatenate([im_stack,new_not_mig])

    shuffle_index = np.arange(len(labels))
    np.random.shuffle(shuffle_index)

    labels = labels[shuffle_index]
    data = data[shuffle_index]
    
    train_data_noise, test_data_noise, train_labels, test_labels = \
    train_test_split(data, labels, test_size=0.2, random_state=42)

    del data, labels, im_stack_plain, im_stack, e_stack, not_mig, not_mig_shuff, new_not_mig, shuffle_index
    
    #loading the data into tf.data.Dataset objects
    train_dataset_noise = tf.data.Dataset.from_tensor_slices((train_data_noise, train_labels)) 
    test_dataset_noise = tf.data.Dataset.from_tensor_slices((test_data_noise, test_labels))
    # migdal_dataset_noise = tf.data.Dataset.from_tensor_slices((im_stack, np.zeros_like(energies)))
    train_dataset_noise.element_spec

    #batching the datasets
    batch_size = 50
    train_dataset_noise = train_dataset_noise.batch(batch_size)  # drop_remainder=True
    test_dataset_noise = test_dataset_noise.batch(batch_size)
    # migdal_dataset_noise = migdal_dataset_noise.batch(batch_size)
    
    del train_data_noise, test_data_noise, train_labels, test_labels
    
    return train_dataset_noise, test_dataset_noise

In [6]:
file_type_list = ['_0.0_threshold', '_0.0_threshold', '_4.0_threshold']

*Re-run from here*

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def mig_model():
    global file_type_list
    mig_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0.1),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L2(0.1)),
        LeakyReLU(),
        Dropout(0.1),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L2(0.1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='mig_model')
    return mig_model

model = mig_model()

### Learning Rate

Plan: Function over different parameters, plotting the above for each

In [12]:
def lr_func(l_r):

    """returns fpr, percentage of Migdals identified as Migdal (accuracy)"""
    
    start_time = time.time()
    
    train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[0,1,0])

    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

    model = mig_model()

    print('Training '+str(l_r)+' learning rate...')
    opt = tf.keras.optimizers.Adam(learning_rate=l_r)
    model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_dataset_noise, epochs=100, validation_data=(test_dataset_noise),callbacks=[callback],\
             verbose = 0)
    
    
    actual_label = []
    fp_indices = []
    migdals = 0
    
    cut = 0.01
    total = 0

    for tdn in test_dataset_noise:
        data, labels = tdn
        labels = labels.numpy().flatten()
        batch_probs = model(data).numpy().flatten()
        indices = np.where(batch_probs < cut)[0] #+ counter
        actual_label.extend(list(labels[indices]))
        migdals += len(labels[labels == 0])

        fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

        total += len(labels)
    
    test_list = [x for x in actual_label if x == 0]
    del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
    print()
    print('Learning Rate: '+str(l_r)+'\n')
    print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
    print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
    print('Total number of images tested: '+str(total))
    print('Total number of actual Migdal events tested: '+str(migdals))
    print('Number of Migdal events identified: '+str(len(actual_label)))
    # print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
    print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
    print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
    print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
    print('Computation time: --- %s seconds ---' % (time.time() - start_time))
    print()
    
    fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
    del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut
    
    return fpr, acc

In [13]:
fpr_list, acc_list = [],[]
for i in [0.1, 0.01, 0.001, 0.005, 0.0001]:
    globals()['fpr'+str(i)], globals()['acc'+str(i)] = lr_func(i)
    fpr_list.append(globals()['fpr'+str(i)])
    acc_list.append(globals()['acc'+str(i)])

Training 0.1 learning rate...

Learning Rate: 0.1

Number of Migdal events identified with cut of 0.01 = 2605
Accuracy with cut of 0.01 = 72.284%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1943
Number of Migdal events identified: 2605
Percentage of Migdal events identified correctly: 96.912%
Number of false positive Migdal events: 722
False-positive rate: 36.6%
Computation time: --- 230.25847244262695 seconds ---

Training 0.01 learning rate...

Learning Rate: 0.01

Number of Migdal events identified with cut of 0.01 = 1202
Accuracy with cut of 0.01 = 99.334%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1968
Number of Migdal events identified: 1202
Percentage of Migdal events identified correctly: 60.671%
Number of false positive Migdal events: 8
False-positive rate: 0.411%
Computation time: --- 445.374125957489 seconds ---

Training 0.001 learning rate...

Learning Rate: 0.001

Number of Migdal events identified

In [14]:
fpr_list

[36.594019260010135, 0.4106776180698152, 0.0, 0.0, 0.2035623409669211]

In [15]:
acc_list

[96.91199176531137,
 60.670731707317074,
 63.403382880574064,
 45.55774925962488,
 47.15530497180933]

### L2 Regulariser Rate

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def reg_model(reg_rate):
    global file_type_list
    reg_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0.1),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L2(reg_rate)),
        LeakyReLU(),
        Dropout(0.1),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L2(reg_rate)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='reg_model')
    return reg_model

model = reg_model(0.1)

In [19]:
def l2_func(l2):

    """returns fpr, percentage of Migdals identified as Migdal (accuracy)"""
    
    start_time = time.time()
    
    train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[0,1,0])

    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

    model = reg_model(l2)

    print('Training '+str(l2)+' regularisation rate...')
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_dataset_noise, epochs=100, validation_data=(test_dataset_noise),callbacks=[callback],\
             verbose = 0)
    
    
    actual_label = []
    fp_indices = []
    migdals = 0
    
    cut = 0.01
    total = 0

    for tdn in test_dataset_noise:
        data, labels = tdn
        labels = labels.numpy().flatten()
        batch_probs = model(data).numpy().flatten()
        indices = np.where(batch_probs < cut)[0] #+ counter
        actual_label.extend(list(labels[indices]))
        migdals += len(labels[labels == 0])

        fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

        total += len(labels)
    
    test_list = [x for x in actual_label if x == 0]
    del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
    print()
    print('L2 Regularisation Rate: '+str(l2)+'\n')
    print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
    print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
    print('Total number of images tested: '+str(total))
    print('Total number of actual Migdal events tested: '+str(migdals))
    print('Number of Migdal events identified: '+str(len(actual_label)))
    # print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
    print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
    print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
    print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
    print('Computation time: --- %s seconds ---' % (time.time() - start_time))
    print()
    
    fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
    del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut
    
    return fpr, acc

In [20]:
fpr_list, acc_list = [],[]
for i in [0, 0.1, 0.2, 0.3, 0.4]:
    globals()['fpr'+str(i)], globals()['acc'+str(i)] = l2_func(i)
    fpr_list.append(globals()['fpr'+str(i)])
    acc_list.append(globals()['acc'+str(i)])

Training 0 regularisation rate...

L2 Regularisation Rate: 0

Number of Migdal events identified with cut of 0.01 = 1061
Accuracy with cut of 0.01 = 99.906%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1885
Number of Migdal events identified: 1061
Percentage of Migdal events identified correctly: 56.233%
Number of false positive Migdal events: 1
False-positive rate: 0.0492%
Computation time: --- 386.1854543685913 seconds ---

Training 0.1 regularisation rate...

L2 Regularisation Rate: 0.1

Number of Migdal events identified with cut of 0.01 = 1311
Accuracy with cut of 0.01 = 99.771%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1941
Number of Migdal events identified: 1311
Percentage of Migdal events identified correctly: 67.388%
Number of false positive Migdal events: 3
False-positive rate: 0.152%
Computation time: --- 870.4448153972626 seconds ---

Training 0.2 regularisation rate...

L2 Regularisation Rate: 0.2


In [21]:
fpr_list

[0.049236829148202856,
 0.1518987341772152,
 0.0,
 0.05211047420531527,
 0.05136106831022085]

In [22]:
acc_list

[56.23342175066313,
 67.38794435857805,
 46.0431654676259,
 62.543815723585375,
 60.79228034535297]

### Dropout Rate

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def drp_model(drp_rate):
    global file_type_list
    drp_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(drp_rate),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L2(0.1)),
        LeakyReLU(),
        Dropout(drp_rate),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L2(0.1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='drp_model')
    return drp_model

model = drp_model(0.1)

In [24]:
def drp_func(drp_rate):

    """returns fpr, percentage of Migdals identified as Migdal (accuracy)"""
    
    start_time = time.time()
    
    train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[0,1,0])

    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

    model = drp_model(drp_rate)

    print('Training '+str(drp_rate)+' dropout rate...')
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_dataset_noise, epochs=100, validation_data=(test_dataset_noise),callbacks=[callback],\
             verbose = 0)
    
    
    actual_label = []
    fp_indices = []
    migdals = 0
    
    cut = 0.01
    total = 0

    for tdn in test_dataset_noise:
        data, labels = tdn
        labels = labels.numpy().flatten()
        batch_probs = model(data).numpy().flatten()
        indices = np.where(batch_probs < cut)[0] #+ counter
        actual_label.extend(list(labels[indices]))
        migdals += len(labels[labels == 0])

        fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

        total += len(labels)
    
    test_list = [x for x in actual_label if x == 0]
    del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
    print()
    print('Dropour Rate: '+str(drp_rate)+'\n')
    print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
    print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
    print('Total number of images tested: '+str(total))
    print('Total number of actual Migdal events tested: '+str(migdals))
    print('Number of Migdal events identified: '+str(len(actual_label)))
    # print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
    print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
    print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
    print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
    print('Computation time: --- %s seconds ---' % (time.time() - start_time))
    print()
    
    fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
    del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut
    
    return fpr, acc

In [25]:
fpr_list, acc_list = [],[]
for i in [0, 0.05, 0.1, 0.15, 0.25, 0.4]:
    globals()['fpr'+str(i)], globals()['acc'+str(i)] = drp_func(i)
    fpr_list.append(globals()['fpr'+str(i)])
    acc_list.append(globals()['acc'+str(i)])

Training 0 dropout rate...

Dropour Rate: 0

Number of Migdal events identified with cut of 0.01 = 1253
Accuracy with cut of 0.01 = 99.920%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1918
Number of Migdal events identified: 1253
Percentage of Migdal events identified correctly: 65.276%
Number of false positive Migdal events: 1
False-positive rate: 0.0501%
Computation time: --- 657.0720698833466 seconds ---

Training 0.05 dropout rate...

Dropour Rate: 0.05

Number of Migdal events identified with cut of 0.01 = 1324
Accuracy with cut of 0.01 = 99.622%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1965
Number of Migdal events identified: 1324
Percentage of Migdal events identified correctly: 67.125%
Number of false positive Migdal events: 5
False-positive rate: 0.256%
Computation time: --- 519.2526006698608 seconds ---

Training 0.1 dropout rate...

Dropour Rate: 0.1

Number of Migdal events identified with cut of 0

In [26]:
fpr_list

[0.05005005005005005,
 0.25627883136852897,
 0.05263157894736842,
 0.05178663904712584,
 0.20273694880892043,
 0.0508130081300813]

In [27]:
acc_list

[65.27632950990615,
 67.12468193384224,
 54.41468253968254,
 65.84382871536523,
 68.14204837879568,
 66.06776180698152]

### L1 Regulariser Rate

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def l1_model(l1):
    global file_type_list
    l1_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L1(l1)),
        LeakyReLU(),
        Dropout(0),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L1(l1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='l1_model')
    return l1_model

model = l1_model(0.1)

In [12]:
def l1_func(l1):

    """returns fpr, percentage of Migdals identified as Migdal (accuracy)"""
    
    start_time = time.time()
    
    train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[0,1,0])

    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

    model = l1_model(l1)

    print('Training '+str(l1)+' regularisation rate...')
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_dataset_noise, epochs=100, validation_data=(test_dataset_noise),callbacks=[callback],\
             verbose = 0)
    
    
    actual_label = []
    fp_indices = []
    migdals = 0
    
    cut = 0.01
    total = 0

    for tdn in test_dataset_noise:
        data, labels = tdn
        labels = labels.numpy().flatten()
        batch_probs = model(data).numpy().flatten()
        indices = np.where(batch_probs < cut)[0] #+ counter
        actual_label.extend(list(labels[indices]))
        migdals += len(labels[labels == 0])

        fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

        total += len(labels)
    
    test_list = [x for x in actual_label if x == 0]
    del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
    print()
    print('L1 Regularisation Rate: '+str(l1)+'\n')
    print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
    print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
    print('Total number of images tested: '+str(total))
    print('Total number of actual Migdal events tested: '+str(migdals))
    print('Number of Migdal events identified: '+str(len(actual_label)))
    # print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
    print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
    print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
    print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
    print('Computation time: --- %s seconds ---' % (time.time() - start_time))
    print()
    
    fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
    del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut
    
    return fpr, acc

In [14]:
fpr_list, acc_list = [],[]
for i in [0, 0.1, 0.2, 0.3, 0.4]:
    globals()['fpr'+str(i)], globals()['acc'+str(i)] = l1_func(i)
    fpr_list.append(globals()['fpr'+str(i)])
    acc_list.append(globals()['acc'+str(i)])

Training 0 regularisation rate...

L1 Regularisation Rate: 0

Number of Migdal events identified with cut of 0.01 = 1069
Accuracy with cut of 0.01 = 99.626%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1929
Number of Migdal events identified: 1069
Percentage of Migdal events identified correctly: 55.210%
Number of false positive Migdal events: 4
False-positive rate: 0.201%
Computation time: --- 339.1697053909302 seconds ---

Training 0.1 regularisation rate...

L1 Regularisation Rate: 0.1

Number of Migdal events identified with cut of 0.01 = 1244
Accuracy with cut of 0.01 = 99.839%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1966
Number of Migdal events identified: 1244
Percentage of Migdal events identified correctly: 63.174%
Number of false positive Migdal events: 2
False-positive rate: 0.103%
Computation time: --- 1072.6538841724396 seconds ---

Training 0.2 regularisation rate...

L1 Regularisation Rate: 0.2


### L1 + L2 Regularisation

In [24]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def l12_model(l1,l2):
    global file_type_list
    l12_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L1L2(l1,l2)),
        LeakyReLU(),
        Dropout(0),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L1L2(l1,l2)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='l12_model')
    return l12_model

model = l12_model(0.1,0.1)

In [25]:
def l12_func(l1, l2):

    """returns fpr, percentage of Migdals identified as Migdal (accuracy)"""
    
    start_time = time.time()
    
    train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[0,1,0])

    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

    model = l12_model(l1, l2)

    print('Training L1 = '+str(l1)+' and L2 = '+str(l2)+' regularisation rates...')
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_dataset_noise, epochs=100, validation_data=(test_dataset_noise),callbacks=[callback],\
             verbose = 0)
    
    
    actual_label = []
    fp_indices = []
    migdals = 0
    
    cut = 0.01
    total = 0

    for tdn in test_dataset_noise:
        data, labels = tdn
        labels = labels.numpy().flatten()
        batch_probs = model(data).numpy().flatten()
        indices = np.where(batch_probs < cut)[0] #+ counter
        actual_label.extend(list(labels[indices]))
        migdals += len(labels[labels == 0])

        fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

        total += len(labels)
    
    test_list = [x for x in actual_label if x == 0]
    del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
    print()
    print('L1 Regularisation Rate: '+str(l1)+', L2 Regularisation Rate: '+str(l2)+'\n')
    print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
    print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
    print('Total number of images tested: '+str(total))
    print('Total number of actual Migdal events tested: '+str(migdals))
    print('Number of Migdal events identified: '+str(len(actual_label)))
    # print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
    print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
    print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
    print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
    print('Computation time: --- %s seconds ---' % (time.time() - start_time))
    print()
    
    fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
    del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut
    
    return fpr, acc

In [None]:
fpr_list, acc_list = [],[]
for i in [0, 0.1, 0.2]:
    for j in [0, 0.1, 0.3]:
        fpr, acc = l12_func(i, j)
        fpr_list.append(fpr)
        acc_list.append(acc)

Training L1 = 0 and L2 = 0 regularisation rates...

L1 Regularisation Rate: 0, L2 Regularisation Rate: 0

Number of Migdal events identified with cut of 0.01 = 1434
Accuracy with cut of 0.01 = 96.722%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1951
Number of Migdal events identified: 1434
Percentage of Migdal events identified correctly: 71.092%
Number of false positive Migdal events: 47
False-positive rate: 2.39%
Computation time: --- 471.4690525531769 seconds ---

Training L1 = 0 and L2 = 0.1 regularisation rates...

L1 Regularisation Rate: 0, L2 Regularisation Rate: 0.1

Number of Migdal events identified with cut of 0.01 = 1331
Accuracy with cut of 0.01 = 99.775%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1924
Number of Migdal events identified: 1331
Percentage of Migdal events identified correctly: 69.023%
Number of false positive Migdal events: 3
False-positive rate: 0.151%
Computation time: --- 787.26863

In [None]:
fpr_list

In [None]:
acc_list

Do L1 + L2

Pick best overall parameters
Plot ROC curve for result
S

### Optimal Model

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def opt_model():
    global file_type_list
    opt_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0.05),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dropout(0.05),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='opt_model')
    return opt_model

model = opt_model

In [14]:
train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[0,1,0])

callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

model = opt_model()

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
model.fit(train_dataset_noise, epochs=100, validation_data=(test_dataset_noise),callbacks=[callback],\
         verbose = 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


<tensorflow.python.keras.callbacks.History at 0x7fe8b4616220>

In [16]:
actual_label = []
fp_indices = []
migdals = 0

cut = 0.01
total = 0

for tdn in test_dataset_noise:
    data, labels = tdn
    labels = labels.numpy().flatten()
    batch_probs = model(data).numpy().flatten()
    indices = np.where(batch_probs < cut)[0] #+ counter
    actual_label.extend(list(labels[indices]))
    migdals += len(labels[labels == 0])

    fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

    total += len(labels)

test_list = [x for x in actual_label if x == 0]
# del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
print()
print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
print('Total number of images tested: '+str(total))
print('Total number of actual Migdal events tested: '+str(migdals))
print('Number of Migdal events identified: '+str(len(actual_label)))
# print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
# print('Computation time: --- %s seconds ---' % (time.time() - start_time))
print()

# fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
# del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut


Number of Migdal events identified with cut of 0.01 = 1374
Accuracy with cut of 0.01 = 99.854%
Total number of images tested: 3916
Total number of actual Migdal events tested: 2009
Number of Migdal events identified: 1374
Percentage of Migdal events identified correctly: 68.293%
Number of false positive Migdal events: 2
False-positive rate: 0.105%



In [19]:
actual_label = []
fp_indices = []
migdals = 0

cut = 0.001
total = 0

for tdn in test_dataset_noise:
    data, labels = tdn
    labels = labels.numpy().flatten()
    batch_probs = model(data).numpy().flatten()
    indices = np.where(batch_probs < cut)[0] #+ counter
    actual_label.extend(list(labels[indices]))
    migdals += len(labels[labels == 0])

    fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

    total += len(labels)

test_list = [x for x in actual_label if x == 0]
# del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
print()
print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
print('Total number of images tested: '+str(total))
print('Total number of actual Migdal events tested: '+str(migdals))
print('Number of Migdal events identified: '+str(len(actual_label)))
# print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
# print('Computation time: --- %s seconds ---' % (time.time() - start_time))
print()

# fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
# del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut


Number of Migdal events identified with cut of 0.001 = 1115
Accuracy with cut of 0.001 = 100.000%
Total number of images tested: 3916
Total number of actual Migdal events tested: 2009
Number of Migdal events identified: 1115
Percentage of Migdal events identified correctly: 55.500%
Number of false positive Migdal events: 0
False-positive rate: 0%



In [21]:
actual_label = []
fp_indices = []
migdals = 0

cut = 0.003
total = 0

for tdn in test_dataset_noise:
    data, labels = tdn
    labels = labels.numpy().flatten()
    batch_probs = model(data).numpy().flatten()
    indices = np.where(batch_probs < cut)[0] #+ counter
    actual_label.extend(list(labels[indices]))
    migdals += len(labels[labels == 0])

    fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

    total += len(labels)

test_list = [x for x in actual_label if x == 0]
# del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
print()
print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
print('Total number of images tested: '+str(total))
print('Total number of actual Migdal events tested: '+str(migdals))
print('Number of Migdal events identified: '+str(len(actual_label)))
# print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
# print('Computation time: --- %s seconds ---' % (time.time() - start_time))
print()

# fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
# del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut


Number of Migdal events identified with cut of 0.003 = 1254
Accuracy with cut of 0.003 = 99.920%
Total number of images tested: 3916
Total number of actual Migdal events tested: 2009
Number of Migdal events identified: 1254
Percentage of Migdal events identified correctly: 62.369%
Number of false positive Migdal events: 1
False-positive rate: 0.0524%



In [23]:
# data_list = []

# for tdn in test_dataset_noise:
#     data, labels = tdn
#     data_list.extend(data)
    
# from sklearn.metrics import roc_curve
# y_pred_keras = model.predict(data_list).ravel()
# fpr_keras, tpr_keras, thresholds_keras = roc_curve(test_labels, y_pred_keras)