In [1]:
import setGPU

setGPU: Setting GPU to: 0


In [2]:
import h5py
import json
import random
import itertools

import numpy as np

from sklearn.utils import class_weight
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc, f1_score

%load_ext watermark

  from ._conv import register_converters as _register_converters


In [3]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [4]:
N_GENERATIONS = 10
POPULATION_SIZE = 30
MUTATION_RATE = 0.1
N_LAYERS = 3

NN_PARAM_CHOICES = {
    "layer": {
        "units": [25, 50, 100, 200, 250],
        "activation": ["relu", "elu", "sigmoid", "tanh"],
        "dropout": [0.0, 0.25, 0.5]
    },
    "network":{
        "nlayers": [1, 2, 3, 4],
        "optimizer": ["adam", "rmsprop"]
    }    
}

earlystop = EarlyStopping(monitor='val_acc', patience=5, verbose=1, mode='auto')

In [5]:
class NetworkLayer(object):
    def __init__(self, units=50, activation="relu", dropout=0.1):
        self.units = units
        self.activation = activation
        self.dropout = dropout
    
    def to_json(self):
        """ Returns dict """
        return {attr: getattr(self, attr) for attr in NetworkLayer.attrs()}
        
    def __repr__(self):
        return "({units} {activation} ({dropout}))".format(units=self.units, activation=self.activation, dropout=self.dropout)
    
    @staticmethod
    def attrs():
        return ("units", "activation", "dropout")
        
class Network(object):
    def __init__(self, optimizer="adam"):
        
        # Network parameters
        self.layers = []
        self.optimizer = optimizer
        
        # Model
        self.model = Sequential()
        self.history = None
        
        # Score
        self.confusion_matrix = None
        self.accuracy = None
        self.f1 = None
        self.fpr = None
        self.tpr = None
        self.auc = None
        
    def __eq__(self, other):
        """ Compares if network is equal to other network
        """
        
        # Optimizer must match
        if self.optimizer != other.optimizer:
            return False
        
        # Number of network layers
        nlayers = len(self.layers)
        if nlayers != len(other.layers):
            return False
        
        # Compare all layers
        for n in range(nlayers):
            # Compare all attributes of the NetworkLayer
            for attr in NetworkLayer.attrs():
                if getattr(self.layers[n], attr) != getattr(other.layers[n], attr):
                    return False
        
        return True
        
    
    def compile_model(self, input_shape=(2807,)):
        """ Creates model from network parameters and compiles it """
        
        for index, layer in enumerate(self.layers):

            if index == 0:
                self.model.add(Dense(units=layer.units, activation=layer.activation, input_shape=input_shape))
            else:
                self.model.add(Dense(units=layer.units, activation=layer.activation))

            self.model.add(Dropout(layer.dropout))  

        # Output
        self.model.add(Dense(units=1, activation="sigmoid"))

        self.model.compile(optimizer=self.optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    def train(self, X_train, y_train):
        """ Train model"""
        
        if self.history is not None:
            # Do not retrain already trained model
            return
        
        self.history = self.model.fit(X_train, y_train, 
                        validation_split=0.25, 
                        epochs=10000, 
                        callbacks=[earlystop],
                        batch_size=100, 
                        verbose=0, 
                        class_weight=cw)
        
    def score(self, X_test, y_test):
        """ Evaluates model: calculates accuracy, f1score, roc_auc curves, confusion matrix"""
        # Accuracy and f1_score
        y_pred = self.model.predict(X_test)
        y_pred = (y_pred > 0.5)
        
        self.confusion_matrix = confusion_matrix(y_test, y_pred)

        self.accuracy = accuracy_score(y_test, y_pred)
        self.f1 = f1_score(y_test, y_pred)
    
        # ROC AUC
        y_probas = self.model.predict_proba(X_test, verbose=0)
        fpr, tpr, _ = roc_curve(y_test, y_probas)
        self.auc = auc(fpr, tpr)
        
        self.fpr = fpr
        self.tpr = tpr
    
    def __str__(self):
        return self.__repr__()
    
    def __repr__(self):
        return "Layers: {nlayers} {layers}, Optimizer: {optimizer}, Accuracy {accuracy}, AUC {auc}".format(
            nlayers=len(self.layers), 
            layers=self.layers, 
            optimizer=self.optimizer, 
            accuracy=round(self.accuracy, 3) if self.accuracy else None, 
            auc=round(self.auc, 3) if self.auc else None
        )
    
    def to_json(self):
        """ Returns dict """
        
        j = {
            "score": {
                "accuracy": self.accuracy,
                "f1": self.f1,
                "auc": self.auc,
            },
            "layers": []
        }
        
        for layer in self.layers:
            j["layers"].append(layer.to_json())
        
        return j

### Read dataset

In [6]:
dset = None

filename = "/home/mantas/data/JetHT2016.h5"

with h5py.File(filename, "r") as f:
    dset = f["JetHT2016"][:] 
        
print(dset.shape)

(163684, 2813)


### Features and labels

In [7]:
X = dset[:, :2807] # 2807 features
y = dset[:, 2812]

In [8]:
X = StandardScaler(copy=False).fit_transform(X) # copy=False reduce memory usage

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

### Calculate class weights and class balance

In [10]:
classes, counts = np.unique(y, return_counts=True)
weights = class_weight.compute_class_weight('balanced', classes, y)
cw = {}
for _class, _weight, _count in zip(classes, weights, counts):
    _class = int(_class)
    cw[_class] = _weight
    print("Class {_class} Count {_count} Weight {_weight}".format(_class=_class, _count=_count, _weight=_weight))
print(cw)

Class 0 Count 3041 Weight 26.912857612627427
Class 1 Count 160643 Weight 0.5094650871808918
{0: 26.912857612627427, 1: 0.5094650871808918}


### Initial population

In [11]:
population_history = []
population = []


network1 = Network(optimizer="adam")
network1.layers.append(NetworkLayer(250, "elu", 0.5))
network1.layers.append(NetworkLayer(50, "elu", 0.25))
network1.layers.append(NetworkLayer(15, "elu", 0.25))
network1.compile_model()

network2 = Network(optimizer="adam")
network2.layers.append(NetworkLayer(50, "elu", 0.5))
network2.layers.append(NetworkLayer(250, "elu", 0.25))
network2.layers.append(NetworkLayer(55, "elu", 0.5))
network2.compile_model()

network3 = Network(optimizer="adam")
network3.layers.append(NetworkLayer(250, "elu", 0.25))
network3.layers.append(NetworkLayer(200, "elu", 0.0))
network3.layers.append(NetworkLayer(100, "elu", 0.25))
network3.compile_model()

population.append(network1)
population.append(network2)
population.append(network3)

while len(population) < POPULATION_SIZE:
    
    network = Network()
    network.optimizer = random.choice(NN_PARAM_CHOICES["network"]["optimizer"])
    
    for n in range(N_LAYERS):
        layer = NetworkLayer()
        for attr in NetworkLayer.attrs():
            value = random.choice(NN_PARAM_CHOICES["layer"][attr])
            setattr(layer, attr, value)
            
        network.layers.append(layer)
        
    network.compile_model()
    
    population.append(network)

In [12]:
for network in population:
    print(network)

Layers: 3 [(250 elu (0.5)), (50 elu (0.25)), (15 elu (0.25))], Optimizer: adam, Accuracy None, AUC None
Layers: 3 [(50 elu (0.5)), (250 elu (0.25)), (55 elu (0.5))], Optimizer: adam, Accuracy None, AUC None
Layers: 3 [(250 elu (0.25)), (200 elu (0.0)), (100 elu (0.25))], Optimizer: adam, Accuracy None, AUC None
Layers: 3 [(200 relu (0.0)), (100 elu (0.25)), (250 sigmoid (0.25))], Optimizer: rmsprop, Accuracy None, AUC None
Layers: 3 [(200 elu (0.25)), (200 sigmoid (0.5)), (250 relu (0.5))], Optimizer: adam, Accuracy None, AUC None
Layers: 3 [(250 relu (0.0)), (100 tanh (0.0)), (25 elu (0.0))], Optimizer: adam, Accuracy None, AUC None
Layers: 3 [(100 tanh (0.25)), (50 tanh (0.0)), (25 relu (0.5))], Optimizer: rmsprop, Accuracy None, AUC None
Layers: 3 [(25 elu (0.25)), (250 relu (0.25)), (200 sigmoid (0.25))], Optimizer: rmsprop, Accuracy None, AUC None
Layers: 3 [(250 tanh (0.5)), (50 tanh (0.25)), (25 relu (0.5))], Optimizer: adam, Accuracy None, AUC None
Layers: 3 [(100 elu (0.25)), 

### Crossover

In [13]:
def crossover(parent1, parent2, mutation_rate=0.1):
    """ Crossover parameters of two parents
        Return child network
    """
    nlayers = len(parent1.layers)
    if nlayers != len(parent2.layers):
        # Number of layers must match
        return 

    child = Network()
    
    child.optimizer = random.choice([parent1.optimizer, parent2.optimizer])

    for n in range(nlayers):
        
        units = random.choice([parent1.layers[n].units, parent2.layers[n].units])
        activation = random.choice([parent1.layers[n].activation, parent2.layers[n].activation])
        dropout = random.choice([parent1.layers[n].dropout, parent2.layers[n].dropout])
        
        layer = NetworkLayer(units, activation, dropout)
        
        if MUTATION_RATE < random.random():
            mutation_key = random.choice(list(NN_PARAM_CHOICES["layer"].keys()))
            mutation_val = random.choice(NN_PARAM_CHOICES["layer"][mutation_key])
#             print("mutation ", mutation_key, mutation_val)
            setattr(layer, mutation_key, mutation_val)
            
        child.layers.append(layer)
    
    return child

### Train

In [14]:
def train(population):
    for index, network in enumerate(population):
        print("Training #{}".format(index))
        network.train(X_train, y_train)
        network.score(X_test, y_test)
        print(network)

### Train, breed, mutate

In [15]:
for generation in range(N_GENERATIONS):
    print("Generation {}".format(generation))
    
    # Train
    train(population)
    
    # Save all populations
    population_history.extend(population)
    
    # Sort networks by accuracy
    population = sorted(population, key=lambda x: x.auc, reverse=True)
    
    print("Best:")
    for network in population[:3]:
        print (network)

    # All possible combinations of parents
    parents = itertools.combinations(population, 2)
    
    # Sort combination of parents by sum of their auc scores
    parents = sorted(parents, key=lambda x: sum(n.auc for n in x), reverse=True)
    
    next_generation = []
    
    for (parent1, parent2) in parents:
        child = crossover(parent1, parent2)
        
        # Do not add same networks
        if child not in next_generation:
            
            # Check if child was in ever history
            if child in population_history:
                child_index = population_history.index(child)
                child = population_history[child_index]
            else:
                child.compile_model()
                
            next_generation.append(child)
            
            if len(next_generation) >= POPULATION_SIZE:
                break
    
    population = next_generation
        

Generation 0
Training #0
Epoch 00014: early stopping
Layers: 3 [(250 elu (0.5)), (50 elu (0.25)), (15 elu (0.25))], Optimizer: adam, Accuracy 0.981, AUC 0.959
Training #1
Epoch 00015: early stopping
Layers: 3 [(50 elu (0.5)), (250 elu (0.25)), (55 elu (0.5))], Optimizer: adam, Accuracy 0.982, AUC 0.962
Training #2
Epoch 00010: early stopping
Layers: 3 [(250 elu (0.25)), (200 elu (0.0)), (100 elu (0.25))], Optimizer: adam, Accuracy 0.982, AUC 0.961
Training #3
Epoch 00021: early stopping
Layers: 3 [(200 relu (0.0)), (100 elu (0.25)), (250 sigmoid (0.25))], Optimizer: rmsprop, Accuracy 0.99, AUC 0.79
Training #4
Epoch 00009: early stopping
Layers: 3 [(200 elu (0.25)), (200 sigmoid (0.5)), (250 relu (0.5))], Optimizer: adam, Accuracy 0.912, AUC 0.963
Training #5
Epoch 00009: early stopping
Layers: 3 [(250 relu (0.0)), (100 tanh (0.0)), (25 elu (0.0))], Optimizer: adam, Accuracy 0.974, AUC 0.961
Training #6
Epoch 00013: early stopping
Layers: 3 [(100 tanh (0.25)), (50 tanh (0.0)), (25 relu

Epoch 00012: early stopping
Layers: 3 [(200 relu (0.25)), (100 relu (0.25)), (100 relu (0.0))], Optimizer: adam, Accuracy 0.964, AUC 0.968
Training #23
Epoch 00013: early stopping
Layers: 3 [(100 elu (0.5)), (50 elu (0.25)), (55 relu (0.5))], Optimizer: adam, Accuracy 0.976, AUC 0.968
Training #24
Epoch 00007: early stopping
Layers: 3 [(250 sigmoid (0.5)), (200 elu (0.5)), (200 relu (0.0))], Optimizer: adam, Accuracy 0.967, AUC 0.966
Training #25
Epoch 00017: early stopping
Layers: 3 [(50 elu (0.25)), (200 elu (0.25)), (100 elu (0.25))], Optimizer: adam, Accuracy 0.985, AUC 0.967
Training #26
Epoch 00011: early stopping
Layers: 3 [(100 sigmoid (0.0)), (100 relu (0.0)), (25 relu (0.25))], Optimizer: adam, Accuracy 0.892, AUC 0.956
Training #27
Epoch 00006: early stopping
Layers: 3 [(100 elu (0.5)), (250 tanh (0.5)), (100 relu (0.5))], Optimizer: rmsprop, Accuracy 0.99, AUC 0.904
Training #28
Epoch 00020: early stopping
Layers: 3 [(25 elu (0.25)), (50 elu (0.0)), (25 relu (0.5))], Optimi

Epoch 00016: early stopping
Layers: 3 [(50 sigmoid (0.25)), (25 relu (0.5)), (100 elu (0.25))], Optimizer: adam, Accuracy 0.974, AUC 0.965
Training #13
Epoch 00010: early stopping
Layers: 3 [(250 sigmoid (0.25)), (200 relu (0.5)), (25 tanh (0.5))], Optimizer: adam, Accuracy 0.951, AUC 0.965
Training #14
Epoch 00008: early stopping
Layers: 3 [(100 sigmoid (0.5)), (250 elu (0.25)), (25 relu (0.25))], Optimizer: adam, Accuracy 0.921, AUC 0.965
Training #15
Epoch 00014: early stopping
Layers: 3 [(250 sigmoid (0.25)), (250 tanh (0.5)), (250 elu (0.5))], Optimizer: adam, Accuracy 0.968, AUC 0.962
Training #16
Epoch 00009: early stopping
Layers: 3 [(100 elu (0.25)), (25 elu (0.25)), (200 sigmoid (0.25))], Optimizer: adam, Accuracy 0.963, AUC 0.965
Training #17
Epoch 00009: early stopping
Layers: 3 [(250 sigmoid (0.25)), (250 relu (0.5)), (25 relu (0.0))], Optimizer: adam, Accuracy 0.947, AUC 0.964
Training #18
Epoch 00011: early stopping
Layers: 3 [(250 elu (0.25)), (200 elu (0.25)), (250 rel

KeyboardInterrupt: 

In [16]:
population_history = sorted(population_history, key=lambda x: x.auc, reverse=True)

for network in population_history[:30]:
    print(network)

Layers: 3 [(100 sigmoid (0.5)), (50 elu (0.5)), (50 relu (0.25))], Optimizer: adam, Accuracy 0.973, AUC 0.969
Layers: 3 [(100 sigmoid (0.25)), (250 tanh (0.25)), (25 relu (0.0))], Optimizer: adam, Accuracy 0.964, AUC 0.969
Layers: 3 [(100 sigmoid (0.25)), (250 relu (0.25)), (25 tanh (0.5))], Optimizer: adam, Accuracy 0.97, AUC 0.969
Layers: 3 [(100 sigmoid (0.5)), (50 elu (0.0)), (200 relu (0.0))], Optimizer: adam, Accuracy 0.973, AUC 0.968
Layers: 3 [(100 elu (0.5)), (200 sigmoid (0.5)), (25 relu (0.25))], Optimizer: adam, Accuracy 0.972, AUC 0.968
Layers: 3 [(200 relu (0.25)), (100 relu (0.25)), (100 relu (0.0))], Optimizer: adam, Accuracy 0.964, AUC 0.968
Layers: 3 [(250 sigmoid (0.25)), (50 tanh (0.5)), (200 elu (0.25))], Optimizer: adam, Accuracy 0.961, AUC 0.968
Layers: 3 [(100 elu (0.5)), (50 elu (0.25)), (55 relu (0.5))], Optimizer: adam, Accuracy 0.976, AUC 0.968
Layers: 3 [(50 relu (0.25)), (25 relu (0.0)), (25 elu (0.25))], Optimizer: adam, Accuracy 0.979, AUC 0.967
Layers: 

In [17]:
# Save history
with open("history.json", 'w') as o:
    json.dump([network.to_json() for network in population_history], o, indent=4)

In [18]:
%watermark -a "Mantas Stankevicius" -d -v -m -p numpy,scikit-learn,tensorflow,keras,h5py



Mantas Stankevicius 2018-11-06 

CPython 3.6.5
IPython 7.1.1

numpy 1.14.3
sklearn 0.19.1
tensorflow 1.8.0
keras 2.1.6
h5py 2.7.1

compiler   : GCC 7.3.0
system     : Linux
release    : 4.15.0-36-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 4
interpreter: 64bit
