In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from keras.layers import Input, Add, Dense, ZeroPadding2D, Activation, BatchNormalization, \
    Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.utils.np_utils import to_categorical
from keras.models import Model, load_model
from keras.initializers import glorot_uniform
import matplotlib.pyplot as plt

import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

# TODO: kernal initalizer?
# TODO: average pooling instead of max pooling for final layer?


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
######## Resnet architecture ######## 

def identity_block(X, filters, kernel_size):
    '''
    residual block with 3 skips

    X - input tensor
    filters - number of filters in the convolutional layer
    kernel_size - dimension of square filter to go over image
    stage - way to label position of block in network
    '''

    f1, f2, f3 = filters
    X_shortcut = X

    # First block
    X = Conv2D(filters=f1, kernel_size=(1,1), strides=(1,1), padding='valid', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Second Block
    X = Conv2D(filters=f2, kernel_size=(kernel_size,kernel_size), strides=(1,1), padding='same', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Third Block
    X = Conv2D(filters=f3, kernel_size=(1,1), strides=(1,1), padding='valid', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)

    # Add shortcut Block
    X = Add()([X,X_shortcut])
    X = Activation('relu')(X)

    return X

def conv_block(X, filters, kernel_size, strides=2):
    '''

    residual block with 3 skips, the "shortcut" path has a conv layer

    X - input tensor of shape (h_previous, w_previous, c_previous)
    filters - number of filters in the conv layer
    kernel_size - dimension of square filter to go over image
    strides - how big of a translation the filters taken when going through image

    returns: tensor of shape (height,width,channels)
    '''

    f1, f2, f3 = filters

    X_shortcut = X

    # First Block
    X = Conv2D(filters= f1, kernel_size=(1,1), strides=(strides,strides), padding='valid', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Second Block
    X = Conv2D(filters=f2, kernel_size=(kernel_size,kernel_size), strides=(1,1), padding='same', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)

    # Third Block
    X = Conv2D(filters=f3, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)


    # Conv Shortcut and Adding
    X_shortcut = Conv2D(filters= f3, kernel_size=(1,1), strides=(strides,strides), padding='valid', kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis=3)(X_shortcut)

    X = Add()([X_shortcut, X])
    X = Activation('relu')(X)

    return X

def resnet_50(input_shape = (244, 244, 3), classes = 2):

    '''
    input_shape - dimensions of the image - (img_height, img_width, channels)
    '''

    X_input = Input(input_shape)
    

    X = ZeroPadding2D((3,3))(X_input)
    

    # Initial Convolution Block
    X = Conv2D(filters=64, kernel_size=(7,7), strides=(2,2), kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((1, 1), strides=(1, 1))(X)

    # First Block
    X = conv_block(X, filters=[64, 64, 256], kernel_size=3, strides=1)
    X = identity_block(X, filters=[64, 64, 256], kernel_size=3)
    X = identity_block(X, filters=[64, 64, 256], kernel_size=3)

    # Second Block
    X = conv_block(X, filters=[128, 128, 512], kernel_size=3)
    X = identity_block(X, filters=[128, 128, 512], kernel_size=3)
    X = identity_block(X, filters=[128, 128, 512], kernel_size=3)
    X = identity_block(X, filters=[128, 128, 512], kernel_size=3)

    # Third Block
    X = conv_block(X, filters=[256, 256, 1024], kernel_size=3)
    X = identity_block(X, filters=[256, 256, 1024], kernel_size=3)
    X = identity_block(X, filters=[256, 256, 1024], kernel_size=3)
    X = identity_block(X, filters=[256, 256, 1024], kernel_size=3)
    X = identity_block(X, filters=[256, 256, 1024], kernel_size=3)
    X = identity_block(X, filters=[256, 256, 1024], kernel_size=3)

    # Fourth Block
    X = conv_block(X, filters=[512, 512, 2048], kernel_size=3)
    X = identity_block(X, filters=[512, 512, 2048], kernel_size=3)
    X = identity_block(X, filters=[512, 512, 2048], kernel_size=3)

    X = AveragePooling2D((1,1))(X)

    # Flatten and create model
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', kernel_initializer=glorot_uniform(seed=0))(X)
    model = Model(inputs= X_input, outputs = X, name= "ResNet50")

    return model


In [3]:
def plot_training_history(hist):
    '''
    hist: keras model history from training - model.fit()
    '''
    
    plt.plot(hist.history['acc'])
    plt.plot(hist.history['loss'])
    plt.title('Model accuracy and loss')
    plt.ylabel('Accuracy/Loss')
    plt.xlabel('Epoch')
    plt.legend(['Accuracy', 'Loss'], loc='upper left')
    plt.show()
    

In [None]:
image_rows = 3
image_cols = 3

channels   = 1

# print("LOADING DATA....")

qcd_data_original = np.genfromtxt("qcd_outputDataForLearning.csv", skip_header=1, delimiter=",")
hh_data_original  = np.genfromtxt("dihiggs_outputDataForLearning.csv", skip_header=1, delimiter=",")

# adding easily seperable column
# hh all positive, qcd all negative, uniform dist [0,1]
n_points = 1  # per event
hh_fake = np.random.rand(len(hh_data_original), n_points) 
qcd_fake = np.random.rand(len(qcd_data_original), n_points)

hh_fake = np.abs(hh_fake)
qcd_fake = np.abs(qcd_fake)

hh_fake = hh_fake * (1/np.max(hh_fake))
qcd_fake = -qcd_fake * (1/np.max(qcd_fake))

# add fake column(s) to real data
hh_data_original = np.append(hh_data_original, hh_fake, axis=1)
qcd_data_original = np.append(qcd_data_original, qcd_fake, axis=1)

# generate labels for hh and qcd
hh_labels= np.ones((len(hh_data_original),1))
hh_data_original = np.append(hh_data_original, hh_labels, axis=1)

qcd_labels= np.zeros((len(qcd_data_original),1))
qcd_data_original = np.append(qcd_data_original, qcd_labels, axis=1)

# add all data together
all_data_original = np.append(hh_data_original, qcd_data_original, axis=0)


# list of columns to include from qcd/dihiggs data:
#
# hh_mass h1_mass h2_mass hh_pt h1_pt h2_pt deltaR(h1, h2) deltaR(h1 jets) deltaR(h2 jets)
#  0        1       2       3     4     5     6              7               8
# deltaPhi(h1, h2) deltaPhi(h1 jets) deltaPhi(h2 jets) met met_phi scalarHT nJets nBTags isMatchable
#     9                 10              11             12    13       14     15    16       17
# jet1_pt jet2_pt jet3_pt jet4_pt jet1_eta jet2_eta jet3_eta jet4_eta jet1_phi jet2_phi jet3_phi jet4_phi
#  18      19       20       21      22       23       24       25        26      27       28       29
# jet1_mass jet2_mass jet3_mass jet4_mass jet1_px jet2_px jet3_px jet4_px jet1_py jet2_py jet3_py jet4_py 
#     30        31        32        33        34      35      36      37      38      39      40      41
# jet1_pz jet2_pz jet3_pz jet4_pz jet1_energy jet2_energy jet3_energy jet4_energy 
#     42      43      44      45      46          47          48          49
# jet1_btag jet2_btag jet3_btag jet4_btag fake_column LABEL
#     50        51        52        53        54       55

iteration = [0, 1, 2, 6, 7, 8, 9, 10, 11, 55]

all_data = all_data_original[:,iteration]

for i in range(4):
    np.random.shuffle(all_data)

y    = all_data[:,-1]
X    = all_data[:,:-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=9)

# one hot encoding for labels
y_train = to_categorical(y_train)
y_test  = to_categorical(y_test)

# change input dimensions so it has 4 dimensions instead of 2
X_train     = np.reshape(X_train, (X_train.shape[0],image_rows,image_cols,channels))
X_test      = np.reshape(X_test,   (X_test.shape[0],image_rows,image_cols,channels))


# X dimensions: (num_samples, img_size, img_size, channels) 
# Y dimensions: (num_samples, num_classes)
print("number of training examples = " + str(X_train.shape[0]))
print("number of test examples = "     + str(X_test.shape[0]))
print("X_train shape: "                + str(X_train.shape))
print("Y_train shape: "                + str(y_train.shape))
print("X_test shape:  "                 + str(X_test.shape))
print("Y_test shape:  "                 + str(y_test.shape))


# print("CREATING AND COMPILING RESNET MODEL....")


# create model
model = resnet_50(input_shape=(image_rows,image_cols,channels), classes=2)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#print("BEGINNING MODEL TRAINING....")


# fit model to data, evaluate it against testing data
history = model.fit(X_train, y_train, epochs=3, batch_size=32)
preds   = model.evaluate(X_test, y_test)
print("Loss = ",          preds[0])
print("Test Accuracy = ", preds[1])


number of training examples = 4731
number of test examples = 1577
X_train shape: (4731, 3, 3, 1)
Y_train shape: (4731, 2)
X_test shape: (1577, 3, 3, 1)
Y_test shape: (1577, 2)
Epoch 1/3

In [None]:
plot_training_history(history)