In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from matplotlib import image
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import ModelCheckpoint
 
plt.rcParams['figure.figsize'] = (5,5)
plt.rcParams['figure.dpi'] = 150
 
tf.random.set_seed(42)
np.random.RandomState(42)
 
    
%load_ext autoreload
%autoreload 2

In [2]:
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir('train/ER') if isfile(join('train/ER', f))]

names_ER = list(map(lambda x: x.strip('.png'), onlyfiles))

In [3]:
ER = []
for k in tqdm(onlyfiles):
    img = tf.keras.preprocessing.image.load_img(
        'train/ER/'+k, color_mode='grayscale', target_size=(576,576))
    input_arr = tf.keras.preprocessing.image.img_to_array(img)
    input_arr = np.array([input_arr])  # Convert single image to a batch.
    ER.append(input_arr)
ER = np.asarray(ER)

100%|██████████| 6758/6758 [00:29<00:00, 226.61it/s]


In [4]:
onlyfiles_NR = [f for f in listdir('train/NR') if isfile(join('train/NR', f))]

names_NR = list(map(lambda x: x.strip('.png'), onlyfiles))

In [5]:
NR = []
for k in tqdm(onlyfiles_NR):
    img = tf.keras.preprocessing.image.load_img(
        'train/NR/'+k, color_mode='grayscale', target_size=(576,576))
    input_arr = tf.keras.preprocessing.image.img_to_array(img)
    input_arr = np.array([input_arr])  # Convert single image to a batch.
    NR.append(input_arr)
NR = np.asarray(NR)

100%|██████████| 6646/6646 [00:29<00:00, 227.88it/s]


In [6]:
ER = ER.reshape(-1,576,576)
NR = NR.reshape(-1,576,576)

In [7]:
ER = ER[:, 225:-225, 225:-225]
NR = NR[:, 225:-225, 225:-225]

In [8]:
y_ER = np.zeros(ER.shape[0])
y_NR = np.ones(NR.shape[0])

In [9]:
Xtrain, Xtest, ytrain, ytest = train_test_split(np.concatenate([ER, NR], axis=0), 
                                                np.concatenate([y_ER, y_NR], axis=0), shuffle=True, 
                                               random_state=42)

In [10]:
Xtrain.shape

(10053, 126, 126)

In [11]:
from sklearn.preprocessing import MinMaxScaler

In [12]:
scaler = MinMaxScaler()

In [13]:
Xtrain.reshape(-1).shape

(159601428,)

In [14]:
Xtrain = scaler.fit_transform(Xtrain.reshape(-1, 1)).reshape(-1,126,126,1)
Xtest = scaler.transform(Xtest.reshape(-1, 1)).reshape(-1,126,126,1)

In [21]:
# model using functional /keras API instead of sequential API
inputs = keras.Input(shape=(126,126,1))

In [22]:
x = layers.Conv2D(30, 3, activation='relu')(inputs)
x = layers.BatchNormalization()(x)
x = layers.MaxPool2D()(x)

x = layers.Flatten()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

In [23]:
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [24]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 126, 126, 1)]     0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 124, 124, 30)      300       
_________________________________________________________________
batch_normalization_1 (Batch (None, 124, 124, 30)      120       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 30)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 115320)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 115320)            0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               1153

In [25]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=tf.losses.BinaryCrossentropy(from_logits=False),
  metrics=['accuracy', tf.keras.metrics.AUC()])
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)
checkpoint = ModelCheckpoint("MLHEP_v8_weights/weights.{epoch:02d}-{val_loss:.4f}.hdf5", monitor='accuracy', verbose=1,
    save_best_only=True, mode='auto', save_freq='epoch')

In [26]:
history = model.fit(Xtrain, ytrain, validation_data=(Xtest, ytest), epochs=250, shuffle=True, 
                    callbacks=[checkpoint])

Epoch 1/250

Epoch 00001: accuracy improved from -inf to 0.77639, saving model to MLHEP_v8_weights/weights.01-4.1700.hdf5
Epoch 2/250

Epoch 00002: accuracy improved from 0.77639 to 0.96429, saving model to MLHEP_v8_weights/weights.02-5.4129.hdf5
Epoch 3/250

Epoch 00003: accuracy improved from 0.96429 to 0.98558, saving model to MLHEP_v8_weights/weights.03-6.4066.hdf5
Epoch 4/250

Epoch 00004: accuracy improved from 0.98558 to 0.99025, saving model to MLHEP_v8_weights/weights.04-1.5991.hdf5
Epoch 5/250

Epoch 00005: accuracy improved from 0.99025 to 0.99234, saving model to MLHEP_v8_weights/weights.05-0.0347.hdf5
Epoch 6/250

Epoch 00006: accuracy improved from 0.99234 to 0.99314, saving model to MLHEP_v8_weights/weights.06-0.5774.hdf5
Epoch 7/250

Epoch 00007: accuracy did not improve from 0.99314
Epoch 8/250

Epoch 00008: accuracy improved from 0.99314 to 0.99473, saving model to MLHEP_v8_weights/weights.08-0.0561.hdf5
Epoch 9/250

Epoch 00009: accuracy improved from 0.99473 to 0.99


Epoch 00033: accuracy did not improve from 0.99851
Epoch 34/250

Epoch 00034: accuracy improved from 0.99851 to 0.99910, saving model to MLHEP_v8_weights/weights.34-0.0044.hdf5
Epoch 35/250

Epoch 00035: accuracy did not improve from 0.99910
Epoch 36/250

Epoch 00036: accuracy did not improve from 0.99910
Epoch 37/250

Epoch 00037: accuracy did not improve from 0.99910
Epoch 38/250

Epoch 00038: accuracy did not improve from 0.99910
Epoch 39/250

Epoch 00039: accuracy did not improve from 0.99910
Epoch 40/250

Epoch 00040: accuracy did not improve from 0.99910
Epoch 41/250

Epoch 00041: accuracy did not improve from 0.99910
Epoch 42/250

Epoch 00042: accuracy did not improve from 0.99910
Epoch 43/250

Epoch 00043: accuracy improved from 0.99910 to 0.99920, saving model to MLHEP_v8_weights/weights.43-0.0121.hdf5
Epoch 44/250

Epoch 00044: accuracy did not improve from 0.99920
Epoch 45/250

Epoch 00045: accuracy did not improve from 0.99920
Epoch 46/250

Epoch 00046: accuracy did not im


Epoch 00102: accuracy did not improve from 0.99980
Epoch 103/250

Epoch 00103: accuracy did not improve from 0.99980
Epoch 104/250

Epoch 00104: accuracy did not improve from 0.99980
Epoch 105/250

Epoch 00105: accuracy did not improve from 0.99980
Epoch 106/250

Epoch 00106: accuracy did not improve from 0.99980
Epoch 107/250

Epoch 00107: accuracy did not improve from 0.99980
Epoch 108/250

Epoch 00108: accuracy did not improve from 0.99980
Epoch 109/250

Epoch 00109: accuracy did not improve from 0.99980
Epoch 110/250

Epoch 00110: accuracy did not improve from 0.99980
Epoch 111/250

Epoch 00111: accuracy did not improve from 0.99980
Epoch 112/250

Epoch 00112: accuracy did not improve from 0.99980
Epoch 113/250

Epoch 00113: accuracy did not improve from 0.99980
Epoch 114/250

Epoch 00114: accuracy did not improve from 0.99980
Epoch 115/250

Epoch 00115: accuracy did not improve from 0.99980
Epoch 116/250

Epoch 00116: accuracy did not improve from 0.99980
Epoch 117/250

Epoch 001


Epoch 00172: accuracy did not improve from 0.99990
Epoch 173/250

Epoch 00173: accuracy did not improve from 0.99990
Epoch 174/250

Epoch 00174: accuracy did not improve from 0.99990
Epoch 175/250

Epoch 00175: accuracy did not improve from 0.99990
Epoch 176/250

Epoch 00176: accuracy did not improve from 0.99990
Epoch 177/250

Epoch 00177: accuracy did not improve from 0.99990
Epoch 178/250

Epoch 00178: accuracy did not improve from 0.99990
Epoch 179/250

Epoch 00179: accuracy did not improve from 0.99990
Epoch 180/250

Epoch 00180: accuracy did not improve from 0.99990
Epoch 181/250

Epoch 00181: accuracy did not improve from 0.99990
Epoch 182/250

Epoch 00182: accuracy did not improve from 0.99990
Epoch 183/250

Epoch 00183: accuracy did not improve from 0.99990
Epoch 184/250

Epoch 00184: accuracy did not improve from 0.99990
Epoch 185/250

Epoch 00185: accuracy did not improve from 0.99990
Epoch 186/250

Epoch 00186: accuracy did not improve from 0.99990
Epoch 187/250

Epoch 001


Epoch 00242: accuracy did not improve from 0.99990
Epoch 243/250

Epoch 00243: accuracy did not improve from 0.99990
Epoch 244/250

Epoch 00244: accuracy did not improve from 0.99990
Epoch 245/250

Epoch 00245: accuracy did not improve from 0.99990
Epoch 246/250

Epoch 00246: accuracy did not improve from 0.99990
Epoch 247/250

Epoch 00247: accuracy did not improve from 0.99990
Epoch 248/250

Epoch 00248: accuracy did not improve from 0.99990
Epoch 249/250

Epoch 00249: accuracy did not improve from 0.99990
Epoch 250/250

Epoch 00250: accuracy did not improve from 0.99990


In [27]:
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir('test/pattern') if isfile(join('test/pattern', f))]

In [28]:
names = list(map(lambda x: x.strip('.png'), onlyfiles))

In [36]:
model.load_weights('MLHEP_v8_weights/weights.97-0.0048.hdf5')

In [30]:
predictions_arr =[]
n_id = []
for k in tqdm(onlyfiles):
    img = tf.keras.preprocessing.image.load_img(
        'test/pattern/'+k, color_mode='grayscale', target_size=(576,576))
    input_arr = tf.keras.preprocessing.image.img_to_array(img)
    input_arr = np.array([input_arr])  # Convert single image to a batch.
    predictions_arr.append(input_arr)
    n_id.append(k.strip('.png'))
predictions_arr = np.asarray(predictions_arr)

100%|██████████| 16560/16560 [01:10<00:00, 236.32it/s]


In [31]:
predictions_arr = predictions_arr.reshape(-1,576,576,1)

predictions_arr = predictions_arr[:, 225:-225, 225:-225]
predictions_arr = scaler.transform(predictions_arr.reshape(-1, 1)).reshape(-1,126,126,1)

In [37]:
predictions = model.predict(predictions_arr)

In [38]:
import pandas as pd

In [39]:
d = {'id': n_id, 'particle': predictions.reshape(-1)}
dataframe = pd.DataFrame(d)
dataframe['particle'] = 1-dataframe['particle']

In [40]:
dataframe.to_csv('submission_v12', index=False)