# Transfer Learning

In [5]:
import numpy as np
from skimage import io
from os import listdir
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
import time

from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.utils import shuffle

### GPU testing

In [6]:
with tf.device('/GPU:0'):

    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [7]:
def iter_sqrt(fname, num_iterations):
    data = np.load(fname)
    for i in range(num_iterations):
        data = np.sqrt(data)
    return data

In [8]:
def data_func_stack(file_type_list,sqrt_scale,test_ratio):
    
    im_stack_plain = np.stack([iter_sqrt('np_data/NR_noise'+f+'.npy',s) if s \
                               else np.load('np_data/NR_noise'+f+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
    
    im_stack = np.stack([iter_sqrt('np_data/Migdal_noise'+f+'.npy',s) if s \
                               else np.load('np_data/Migdal_noise'+f+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
    
    e_stack = np.stack([iter_sqrt('np_data/Electron_noise'+f+'.npy',s) if s \
                               else np.load('np_data/Electron_noise'+f+'.npy') \
                               for f,s in zip(file_type_list,sqrt_scale)],axis=-1)
    
    not_mig = np.concatenate([im_stack_plain,e_stack])
    
    shuffle_index = np.arange(len(not_mig))
    np.random.shuffle(shuffle_index)

    not_mig_shuff = not_mig[shuffle_index]
    new_not_mig = not_mig_shuff[:len(im_stack)]
    
    labels = np.concatenate([np.zeros_like(im_stack[:,0,0,0]),np.ones_like(new_not_mig[:,0,0,0])])
    data = np.concatenate([im_stack,new_not_mig])

    shuffle_index = np.arange(len(labels))
    np.random.shuffle(shuffle_index)

    labels = labels[shuffle_index]
    data = data[shuffle_index]
    
    train_data_noise, test_data_noise, train_labels, test_labels = \
    train_test_split(data, labels, test_size=test_ratio, random_state=42)

    del data, labels, im_stack_plain, im_stack, e_stack, not_mig, not_mig_shuff, new_not_mig, shuffle_index
    
    #loading the data into tf.data.Dataset objects
    train_dataset_noise = tf.data.Dataset.from_tensor_slices((train_data_noise, train_labels)) 
    test_dataset_noise = tf.data.Dataset.from_tensor_slices((test_data_noise, test_labels))
    # migdal_dataset_noise = tf.data.Dataset.from_tensor_slices((im_stack, np.zeros_like(energies)))
    train_dataset_noise.element_spec

    #batching the datasets
    batch_size = 50
    train_dataset_noise = train_dataset_noise.batch(batch_size)  # drop_remainder=True
    test_dataset_noise = test_dataset_noise.batch(batch_size)
    # migdal_dataset_noise = migdal_dataset_noise.batch(batch_size)
    
    del train_data_noise, test_data_noise, train_labels, test_labels
    
    return train_dataset_noise, test_dataset_noise

In [9]:
file_type_list = ['_0.0_threshold', '_0.0_threshold', '_4.0_threshold']

*Re-run from here*

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def opt_model():
    global file_type_list
    opt_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0.05),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dropout(0.05),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='opt_model')
    return opt_model

model = opt_model()

In [7]:
train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[0,1,0])

callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=20)

model = opt_model()

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
model.fit(train_dataset_noise, epochs=150, validation_data=(test_dataset_noise),callbacks=[callback],\
         verbose = 1)

2022-07-27 23:20:11.330784: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 8457480000 exceeds 10% of free system memory.
2022-07-27 23:20:15.650455: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 8457480000 exceeds 10% of free system memory.
2022-07-27 23:20:17.800050: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 8457480000 exceeds 10% of free system memory.


Epoch 1/150


2022-07-27 23:20:20.287319: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-07-27 23:20:20.291043: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2099995000 Hz
2022-07-27 23:20:20.510210: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-07-27 23:20:21.212054: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7




2022-07-27 23:20:37.481422: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 8457480000 exceeds 10% of free system memory.


Epoch 2/150


2022-07-27 23:20:50.248104: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 8457480000 exceeds 10% of free system memory.


Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7f4f04157160>

In [8]:
actual_label = []
fp_indices = []
migdals = 0

cut = 0.001
total = 0

for tdn in test_dataset_noise:
    data, labels = tdn
    labels = labels.numpy().flatten()
    batch_probs = model(data).numpy().flatten()
    indices = np.where(batch_probs < cut)[0] #+ counter
    actual_label.extend(list(labels[indices]))
    migdals += len(labels[labels == 0])

    fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

    total += len(labels)

test_list = [x for x in actual_label if x == 0]
# del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
print()
print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
print('Total number of images tested: '+str(total))
print('Total number of actual Migdal events tested: '+str(migdals))
print('Number of Migdal events identified: '+str(len(actual_label)))
# print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
# print('Computation time: --- %s seconds ---' % (time.time() - start_time))
print()

# fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
# del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut


Number of Migdal events identified with cut of 0.001 = 779
Accuracy with cut of 0.001 = 100.000%
Total number of images tested: 3916
Total number of actual Migdal events tested: 1955
Number of Migdal events identified: 779
Percentage of Migdal events identified correctly: 39.847%
Number of false positive Migdal events: 0
False-positive rate: 0%



In [9]:
len(model.layers)

17

## Saving/Restoring Model

In [10]:
model.save('tl_mod_1.h5')

Run from here when restarting session

In [27]:
import numpy as np
from skimage import io
from os import listdir
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
import time

from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.utils import shuffle

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

In [28]:
from tensorflow.keras.models import load_model
model = load_model("tl_mod_1.h5")

In [29]:
len(model.layers)

17

In [30]:
model.summary()

Model: "opt_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 150, 150, 10)      280       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 150, 150, 10)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 75, 75, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 75, 75, 30)        2730      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 75, 75, 30)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 37, 37, 30)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 37, 37, 30)        81

In [31]:
# for i in range(6):
#     model.layers[i].trainable = False

In [32]:
file_type_list = ['_2.0_threshold', '_sub_bg', '']

In [33]:
train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[1,0,0],0.7)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=10)

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
model.fit(train_dataset_noise, epochs=150, validation_data=(test_dataset_noise),callbacks=[callback],\
         verbose = 1)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150


<tensorflow.python.keras.callbacks.History at 0x7f8524cca6a0>

In [43]:
actual_label = []
fp_indices = []
migdals = 0

cut = 0.08
total = 0

for tdn in test_dataset_noise:
    data, labels = tdn
    labels = labels.numpy().flatten()
    batch_probs = model(data).numpy().flatten()
    indices = np.where(batch_probs < cut)[0] #+ counter
    actual_label.extend(list(labels[indices]))
    migdals += len(labels[labels == 0])

    fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

    total += len(labels)

test_list = [x for x in actual_label if x == 0]
# del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
print()
print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
print('Total number of images tested: '+str(total))
print('Total number of actual Migdal events tested: '+str(migdals))
print('Number of Migdal events identified: '+str(len(actual_label)))
# print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
# print('Computation time: --- %s seconds ---' % (time.time() - start_time))
print()

# fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
# del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut


Number of Migdal events identified with cut of 0.08 = 3822
Accuracy with cut of 0.08 = 98.796%
Total number of images tested: 13705
Total number of actual Migdal events tested: 6859
Number of Migdal events identified: 3822
Percentage of Migdal events identified correctly: 55.052%
Number of false positive Migdal events: 46
False-positive rate: 0.672%



# Re-trying same thing without transfer learning

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, LeakyReLU, Dropout

def opt_model():
    global file_type_list
    opt_model = Sequential([
        Conv2D(10, kernel_size=(3,3), input_shape=(150,150,len(file_type_list)), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Conv2D(30, kernel_size=(3,3), padding='same'),
        LeakyReLU(),
        MaxPooling2D(),
        Flatten(),
        Dropout(0.05),
        Dense(20, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dropout(0.05),
        Dense(10, kernel_regularizer = tf.keras.regularizers.L1L2(0.05,0.1)),
        LeakyReLU(),
        Dense(1, activation='sigmoid'),
    ], name='opt_model')
    return opt_model

model = opt_model()

In [18]:
file_type_list = ['_2.0_threshold', '_sub_bg', '']

In [19]:
train_dataset_noise, test_dataset_noise = data_func_stack(file_type_list,[1,0,0],0.7)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', restore_best_weights=True, patience=10)

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])
model.fit(train_dataset_noise, epochs=150, validation_data=(test_dataset_noise),callbacks=[callback],\
         verbose = 1)

2022-07-28 13:15:28.743870: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 7400700000 exceeds 10% of free system memory.


Epoch 1/150


2022-07-28 13:15:33.590535: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-07-28 13:15:33.593951: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2099995000 Hz
2022-07-28 13:15:33.783185: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-07-28 13:15:34.417746: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7




2022-07-28 13:15:42.448494: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 7400700000 exceeds 10% of free system memory.
2022-07-28 13:15:44.200991: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 7400700000 exceeds 10% of free system memory.


Epoch 2/150

2022-07-28 13:15:55.752642: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 7400700000 exceeds 10% of free system memory.


Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150


<tensorflow.python.keras.callbacks.History at 0x7f964c75c580>

In [20]:
actual_label = []
fp_indices = []
migdals = 0

cut = 0.08
total = 0

for tdn in test_dataset_noise:
    data, labels = tdn
    labels = labels.numpy().flatten()
    batch_probs = model(data).numpy().flatten()
    indices = np.where(batch_probs < cut)[0] #+ counter
    actual_label.extend(list(labels[indices]))
    migdals += len(labels[labels == 0])

    fp_indices.extend(list(np.where((batch_probs < cut) & (labels == 1))[0] + total))

    total += len(labels)

test_list = [x for x in actual_label if x == 0]
# del train_dataset_noise, test_dataset_noise, data, labels, batch_probs, indices
print()
print(f'Number of Migdal events identified with cut of {cut} = {len(actual_label)}')
print(f'Accuracy with cut of {cut} = {(len(test_list)/len(actual_label)*100):.3f}%')
print('Total number of images tested: '+str(total))
print('Total number of actual Migdal events tested: '+str(migdals))
print('Number of Migdal events identified: '+str(len(actual_label)))
# print('Number of Migdal events missed: '+str(len([x for x in test_labels if x == 0])-len(test_list)))
print(f'Percentage of Migdal events identified correctly: {(100*len(test_list)/migdals):.3f}%')
print('Number of false positive Migdal events: '+str(len(actual_label)-len(test_list)))
print(f'False-positive rate: {(100*(len(actual_label)-len(test_list))/(total-migdals)):.3g}%')
# print('Computation time: --- %s seconds ---' % (time.time() - start_time))
print()

# fpr, acc = (100*(len(actual_label)-len(test_list))/(total-migdals)), (100*len(test_list)/migdals)
# del start_time, model, actual_label, test_list, fp_indices, migdals, total, cut


Number of Migdal events identified with cut of 0.08 = 3538
Accuracy with cut of 0.08 = 99.802%
Total number of images tested: 13705
Total number of actual Migdal events tested: 6784
Number of Migdal events identified: 3538
Percentage of Migdal events identified correctly: 52.049%
Number of false positive Migdal events: 7
False-positive rate: 0.101%



In [None]:
import sys

local_vars = list(locals().items())
for var, obj in local_vars:
    print(var, sys.get)