# Hyperparameter Optimisation

### Creating smaller dataset

In [10]:
import tensorflow as tf
from os import listdir
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

In [11]:
def data_gen(file_nums, load_dir = '/vols/lz/lshanahan/data/numpy_noise/'):
    for i in file_nums: 
        data = np.expand_dims(np.load(f'{load_dir}/data_{i}.npy'),-1)
        labels = np.load(f'{load_dir}/labels_{i}.npy').astype(np.int32)
        for j in range(len(labels)):
            yield data[j], labels[j:j+1]
            
            
def data_gen_stack(file_nums, file_type_list, sqrt_scale= None, load_dir = '/vols/lz/lshanahan/data/numpy_noise/'):
    if sqrt_scale == None:
        sqrt_scale = [0 for f in file_type_list]
    for i in file_nums: 
        data = np.stack([np.sqrt(np.load(f'{load_dir}/data_{i}_noise'+f.decode('utf-8')+'.npy')) if s \
                               else np.load(f'{load_dir}/data_{i}_noise'+f.decode('utf-8')+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
        labels = np.load(f'{load_dir}/labels_{i}.npy').astype(np.int32)
        for j in range(len(labels)):
            yield data[j,:,:,:], labels[j:j+1]

In [12]:
def opt_model(file_type_list):
    opt_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(184,184,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0.05),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dropout(0.05),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='opt_model')
    return opt_model

In [46]:
def data_train():
    
    file_nums = list(range(36))
    file_type_list = ['_0.0_threshold', '_2.0_threshold', '_4.0_threshold',]
    sqrt_scale = [0,1,0]
    print(file_type_list)
    np.random.shuffle(file_nums)
    train_dataset = tf.data.Dataset.from_generator(data_gen_stack, 
                                            args = (file_nums[:2], file_type_list, sqrt_scale),
                                            output_shapes=(tf.TensorShape((184,184,len(file_type_list))),tf.TensorShape(1)), 
                                            output_types=(tf.float64,tf.int32))

    train_data_batch = train_dataset.batch(50)

    test_dataset = tf.data.Dataset.from_generator(data_gen_stack, 
                                            args = (file_nums[2:3], file_type_list, sqrt_scale),
                                            output_shapes=(tf.TensorShape((184,184,len(file_type_list))),tf.TensorShape(1)), 
                                            output_types=(tf.float64,tf.int32))

    test_data_batch = test_dataset.batch(50)
    
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

    model = opt_model(file_type_list)

    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_data_batch, epochs=150, validation_data=test_data_batch,callbacks=[callback],\
             verbose = 1)

In [39]:
def data_train():
    
    file_type_list = ['_0.0_threshold', '_0.0_threshold', '_4.0_threshold']
    sqrt_scale = [0,1,0]
    
    file_nums = list(range(36))
    np.random.shuffle(file_nums)
    train_dataset = tf.data.Dataset.from_generator(data_gen_stack, 
                                            args = (file_nums[:2], file_type_list, sqrt_scale),
                                            output_shapes=(tf.TensorShape((184,184,len(file_type_list))),tf.TensorShape(1)), 
                                            output_types=(tf.float64,tf.int32))

    train_data_batch = train_dataset.batch(50)

    test_dataset = tf.data.Dataset.from_generator(data_gen_stack, 
                                            args = (file_nums[2:3], file_type_list, sqrt_scale),
                                            output_shapes=(tf.TensorShape((184,184,len(file_type_list))),tf.TensorShape(1)), 
                                            output_types=(tf.float64,tf.int32))

    test_data_batch = test_dataset.batch(50)
    
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

    model = opt_model(file_type_list)

    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_data_batch, epochs=150, validation_data=test_data_batch,callbacks=[callback],\
             verbose = 0)
    
#     cut = 0.1
    
#     for tdn in test_dataset_batch:
#         data, labels = tdn
#         labels = labels.numpy().flatten()
#         batch_probs = model(data).numpy().flatten()
#         indices = np.where(batch_probs < cut)[0]
#         actual_label.extend(list(labels[indices]))
#         migdals += len(labels[labels == 0])

#         fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

#         total += len(labels)

#     test_list = [x for x in actual_label if x == 0]
#     del data, labels, batch_probs, indices
#     print('With file_type_list of: '+str(file_type_list)+' and sqrt_scale of: '+str(sqrt_scale))
#     print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
#     print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
#     print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
#     print()

In [47]:
data_train()

['_0.0_threshold', '_2.0_threshold', '_4.0_threshold']
Epoch 1/150


2022-08-11 10:49:09.643460: W tensorflow/core/framework/op_kernel.cc:1751] Invalid argument: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Traceback (most recent call last):

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 249, in __call__
    ret = func(*args)

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 620, in wrapper
    return func(*args, **kwargs)

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 891, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/tmp/ipykernel_14340/3906209123.py", line 10, in data_gen_stack
    if sqrt_scale == None:

ValueError: The truth value of an arra

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Traceback (most recent call last):

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 249, in __call__
    ret = func(*args)

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 620, in wrapper
    return func(*args, **kwargs)

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 891, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/tmp/ipykernel_14340/3906209123.py", line 10, in data_gen_stack
    if sqrt_scale == None:

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
  (1) Invalid argument:  ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Traceback (most recent call last):

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 249, in __call__
    ret = func(*args)

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 620, in wrapper
    return func(*args, **kwargs)

  File "/vols/lz/MIGDAL/home/hep/lms121/vols/lz/MIGDAL/Anaconda/envs/tf_gpu/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 891, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/tmp/ipykernel_14340/3906209123.py", line 10, in data_gen_stack
    if sqrt_scale == None:

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
	 [[Shape/_6]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_15858]

Function call stack:
train_function -> train_function


In [34]:
file_types = ['','sqrt_sub_bg','_0.0_threshold','_1.0_threshold','_2.0_threshold','_3.0_threshold',\
             '_4.0_threshold','_5.0_threshold']

for i in file_types:
    print(i)
    
from itertools import chain, combinations

def powerset(iterable):
    return chain.from_iterable(combinations(file_types, r) for r in range(len(file_types)+1))

stuff = [1, 2, 3]
for i, combo in enumerate(powerset(stuff), 1):
    print('combo #{}: {}'.format(i, combo))


sqrt_sub_bg
_0.0_threshold
_1.0_threshold
_2.0_threshold
_3.0_threshold
_4.0_threshold
_5.0_threshold
combo #1: ()
combo #2: ('',)
combo #3: ('sqrt_sub_bg',)
combo #4: ('_0.0_threshold',)
combo #5: ('_1.0_threshold',)
combo #6: ('_2.0_threshold',)
combo #7: ('_3.0_threshold',)
combo #8: ('_4.0_threshold',)
combo #9: ('_5.0_threshold',)
combo #10: ('', 'sqrt_sub_bg')
combo #11: ('', '_0.0_threshold')
combo #12: ('', '_1.0_threshold')
combo #13: ('', '_2.0_threshold')
combo #14: ('', '_3.0_threshold')
combo #15: ('', '_4.0_threshold')
combo #16: ('', '_5.0_threshold')
combo #17: ('sqrt_sub_bg', '_0.0_threshold')
combo #18: ('sqrt_sub_bg', '_1.0_threshold')
combo #19: ('sqrt_sub_bg', '_2.0_threshold')
combo #20: ('sqrt_sub_bg', '_3.0_threshold')
combo #21: ('sqrt_sub_bg', '_4.0_threshold')
combo #22: ('sqrt_sub_bg', '_5.0_threshold')
combo #23: ('_0.0_threshold', '_1.0_threshold')
combo #24: ('_0.0_threshold', '_2.0_threshold')
combo #25: ('_0.0_threshold', '_3.0_threshold')
combo #26: (