In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import ModelCheckpoint
from matplotlib import image


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
 
plt.rcParams['figure.figsize'] = (5,5)
plt.rcParams['figure.dpi'] = 150
 
tf.random.set_seed(42)
np.random.RandomState(42)
 
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    
%load_ext autoreload
%autoreload 2

# Data preparation

In [2]:
from helper import load_data, crop_images
ER, yER = load_data('train/ER/')
NR, yNR = load_data('train/NR/')

ER = crop_images(ER)
NR = crop_images(NR)

100%|██████████| 6758/6758 [00:31<00:00, 217.59it/s]
100%|██████████| 6646/6646 [00:29<00:00, 224.46it/s]


In [3]:
ypER = np.zeros(ER.shape[0])
ypNR = np.ones(NR.shape[0])

In [4]:
Xtrain, Xtest, ytrain, ytest, yptrain, yptest = train_test_split(np.concatenate([ER, NR], axis=0), 
                                                np.concatenate([yER, yNR], axis=0), 
                                                np.concatenate([ypER, ypNR], axis=0), shuffle=True, 
                                               random_state=42)

In [5]:
scaler = MinMaxScaler()

Xtrain = scaler.fit_transform(Xtrain.reshape(-1, 1)).reshape(-1, Xtrain.shape[1], Xtrain.shape[2], 1)
Xtest = scaler.transform(Xtest.reshape(-1, 1)).reshape(-1, Xtrain.shape[1], Xtrain.shape[2], 1)

In [6]:
Xtrain.shape

(10053, 126, 126, 1)

# CNN training

In [7]:
inputs = keras.Input(shape=(Xtrain.shape[1], Xtrain.shape[2], 1))

In [8]:
x = layers.Conv2D(30, 3, activation='relu')(inputs)
x = layers.BatchNormalization()(x)
x = layers.MaxPool2D()(x)

x = layers.Flatten()(x)
x = layers.Dropout(0.2)(x)

x1 = layers.Dense(100,activation='relu')(x)
x1 = layers.Dropout(0.1)(x1)

x2 = layers.Dense(300,activation='relu')(x)
x2 = layers.Dropout(0.2)(x2)

ptype = layers.Dense(1, activation = "sigmoid", name='ptype')(x1)
energy = layers.Dense(1, name='energy')(x2)

In [9]:
model = tf.keras.Model(inputs=inputs, outputs={'en': energy, 'pty':ptype})

In [10]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 126, 126, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 124, 124, 30) 300         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 124, 124, 30) 120         conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 62, 62, 30)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [11]:
model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001), 
        loss={'en':'mae','pty': tf.losses.BinaryCrossentropy(from_logits = False)},
        loss_weights={'en':1, 'pty':0.1})

callback = tf.keras.callbacks.EarlyStopping(monitor='energy_loss', patience = 30)

checkpoint = ModelCheckpoint(
        "MLHEP_v1/weights.{epoch:02d}-{energy_loss:.4f}.hdf5", 
        monitor='energy_loss', verbose = 1, save_best_only = True, 
        mode='auto', save_freq='epoch')

In [12]:
history = model.fit(
        Xtrain, {'en':ytrain, 'pty':yptrain}, 
        validation_data = (Xtest, {'en':ytest, 'pty':yptest}),
        epochs = 10, batch_size = 32, shuffle = True,
        callbacks=[callback, checkpoint])

Epoch 1/10

Epoch 00001: energy_loss improved from inf to 3.53141, saving model to MLHEP_v1/weights.01-3.5314.hdf5
Epoch 2/10

Epoch 00002: energy_loss improved from 3.53141 to 2.39912, saving model to MLHEP_v1/weights.02-2.3991.hdf5
Epoch 3/10

Epoch 00003: energy_loss improved from 2.39912 to 2.06341, saving model to MLHEP_v1/weights.03-2.0634.hdf5
Epoch 4/10

Epoch 00004: energy_loss improved from 2.06341 to 1.96500, saving model to MLHEP_v1/weights.04-1.9650.hdf5
Epoch 5/10

Epoch 00005: energy_loss improved from 1.96500 to 1.77155, saving model to MLHEP_v1/weights.05-1.7716.hdf5
Epoch 6/10

Epoch 00006: energy_loss improved from 1.77155 to 1.75259, saving model to MLHEP_v1/weights.06-1.7526.hdf5
Epoch 7/10

Epoch 00007: energy_loss improved from 1.75259 to 1.67814, saving model to MLHEP_v1/weights.07-1.6781.hdf5
Epoch 8/10

Epoch 00008: energy_loss improved from 1.67814 to 1.61879, saving model to MLHEP_v1/weights.08-1.6188.hdf5
Epoch 9/10

Epoch 00009: energy_loss improved from 1

# Random forest predictions

In [13]:
model.load_weights('MLHEP_v1/bestv9.230-0.7786.hdf5')

In [14]:
CNN_predictions_test = model.predict(Xtest)

CNN_predictions_train = model.predict(Xtrain)

In [16]:
features_train = np.stack(
        [np.squeeze(CNN_predictions_train['en']), np.squeeze(Xtrain.sum(axis=(1,2)))], axis=1)

features_test = np.stack(
        [np.squeeze(CNN_predictions_test['en']), np.squeeze(Xtest.sum(axis=(1,2)))], axis=1)

In [17]:
model_forest = RandomForestRegressor(criterion='mae', max_depth=50)
model_forest.fit(features_train, ytrain)

RandomForestRegressor(criterion='mae', max_depth=50)

In [18]:
test_forest = model_forest.predict(features_test)

# Create submission file 

In [19]:
from helper import load_test

data, n_id = load_test('test/pattern/')
data = crop_images(data)
data = scaler.transform(data.reshape(-1, 1)).reshape(-1,126,126,1)

100%|██████████| 16560/16560 [01:10<00:00, 233.75it/s]


In [20]:
predictions = model.predict(data)

feature_input = np.stack(
    [np.squeeze(predictions['en']), np.squeeze(data.sum(axis=(1,2)))], axis=1)

forest_predictions = model_forest.predict(feature_input)

In [24]:
import pandas as pd 

d = {'id': n_id, 'energy': forest_predictions}
dataframe = pd.DataFrame(d)

dataframe.to_csv('test_file', index=False)