# Generator

In [2]:
import tensorflow as tf
from os import listdir
import numpy as np

In [3]:
def data_gen(file_nums, load_dir = '/vols/lz/lshanahan/data/numpy_noise/'):
    for i in file_nums: 
        data = np.expand_dims(np.load(f'{load_dir}/data_{i}.npy'),-1)
        labels = np.load(f'{load_dir}/labels_{i}.npy').astype(np.int32)
        for j in range(len(labels)):
            yield data[j], labels[j:j+1]
            
            
def data_gen_stack(file_nums, file_type_list, sqrt_scale= None, load_dir = '/vols/lz/lshanahan/data/numpy_noise/'):
    if sqrt_scale == None:
        sqrt_scale = [0 for f in file_type_list]
    for i in file_nums: 
        data = np.stack([np.sqrt(np.load(f'{load_dir}/data_{i}_noise'+f.decode('utf-8')+'.npy')) if s \
                               else np.load(f'{load_dir}/data_{i}_noise'+f.decode('utf-8')+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
        labels = np.load(f'{load_dir}/labels_{i}.npy').astype(np.int32)
        for j in range(len(labels)):
            yield data[j,:,:,:], labels[j:j+1]

In [5]:
file_nums = list(range(36))
file_type_list = ['_0.0_threshold', '_2.0_threshold', '_4.0_threshold',]
print(file_type_list)
np.random.shuffle(file_nums)
split = 25
train_dataset = tf.data.Dataset.from_generator(data_gen_stack, 
                                        args = (file_nums[:split], file_type_list),
                                        output_shapes=(tf.TensorShape((184,184,len(file_type_list))),tf.TensorShape(1)), 
                                        output_types=(tf.float64,tf.int32))

train_data_batch = train_dataset.batch(50)

test_dataset = tf.data.Dataset.from_generator(data_gen_stack, 
                                        args = (file_nums[split:], file_type_list),
                                        output_shapes=(tf.TensorShape((184,184,len(file_type_list))),tf.TensorShape(1)), 
                                        output_types=(tf.float64,tf.int32))

test_data_batch = test_dataset.batch(50)

['_0.0_threshold', '_2.0_threshold', '_4.0_threshold']


In [6]:
train_data_batch.element_spec

(TensorSpec(shape=(None, 184, 184, 3), dtype=tf.float64, name=None),
 TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))

In [7]:
test_data_batch.element_spec

(TensorSpec(shape=(None, 184, 184, 3), dtype=tf.float64, name=None),
 TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def opt_model(file_type_list):
    opt_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(184,184,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0.05),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dropout(0.05),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='opt_model')
    return opt_model

model = opt_model(file_type_list)

In [9]:
model.summary()

Model: "opt_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 184, 184, 10)      280       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 184, 184, 10)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 92, 92, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 92, 92, 30)        2730      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 92, 92, 30)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 46, 46, 30)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 46, 46, 30)        81

In [10]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=15)

model = opt_model(file_type_list)

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
model.fit(train_data_batch, epochs=150, validation_data=test_data_batch,callbacks=[callback],\
         verbose = 1)

2022-08-11 09:23:19.366582: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-08-11 09:23:19.367177: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2099995000 Hz


Epoch 1/150


2022-08-11 09:23:20.092465: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-08-11 09:23:34.703076: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


Epoch 2/150
Epoch 3/150

KeyboardInterrupt: 

Expecting 32.6 migdals per 1 million events (see paper for this)

- Split generated data into train and test sets
