In [1]:
import numpy as np
import random
import pandas as pd
import sklearn.metrics
import sklearn.cross_validation as cv
from sklearn.cross_validation import KFold, train_test_split
from sklearn import ensemble
import time
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()

In [2]:
#### Libraries
# Standard library
import json
import random
import sys

# Third-party libraries
import numpy as np
from sklearn.metrics import roc_curve, auc, roc_auc_score


#### Define the quadratic and cross-entropy cost functions
###From here on a is a function of the weights and biases (w,b) and input x, a=a(x,w,b).
###p is the penalty that corresponds to an anistropic learning rate for the minority and 
##majority class. y is the target output. delta is the derivative of the cost function,  
##z^l = w^l a^{l-1} + b^l is the weighted input for each new layer of neurons, 
##i.e. a^l = sigmoid(z^l).
##lambda is the regularization parameter.
class QuadraticCost(object):
    
    @staticmethod
    def fn(a, y, p):
        if y == 1: return p*0.5*np.linalg.norm(a-y)**2
        else: return 0.5*np.linalg.norm(a-y)**2
        
# delta^l = grad_a^l(cost) * sigma'(z^l) is the local layer l's rate of cost change 
# that is propagated back to the input in the backpropagation algorithm 
    @staticmethod
    def delta(z, a, y, p):
        if y == 1: return p*(a-y)*sigmoid_prime(z)
        return (a-y)*sigmoid_prime(z)


class CrossEntropyCost(object):
    
    @staticmethod
    def fn(a, y, p):
        if y == 1: return p*np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
        else: return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
        
##z is not used by this method but we include it in the parameters for the sake of cohesion.
    @staticmethod
    def delta(z, a, y, p):
        if y == 1: return p*(a-y)
        else: return (a-y)


#### THE NETWORK CLASS
class Network(object):

    def __init__(self, sizes, previous_w, previous_b, cost=QuadraticCost):
        self.num_layers = len(sizes)
        self.sizes = sizes # number of neurons in the respective layers of the network
        self.cost = cost
        #self.previous_w = previous_w
        #self.previous_b = previous_b
        if previous_w == [] or previous_b == []:
            print "Default weights"
            self.default_weight_initializer()
        else:
            print "Best previous weights"
            self.previous_best_weight(previous_w, previous_b)
        

    def default_weight_initializer(self):
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)/np.sqrt(x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def large_weight_initializer(self):
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]
        
#initialize the algorithm with the best weights from a previous run        
    def previous_best_weight(self, previous_w, previous_b):
        self.weights = previous_w
        self.biases = previous_b
        
#standard feedforward algorithm for the neural net. sigmoid is vectorized and returns a
#for each hidden layer.
    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    
#stochastic gradient descent method uses a sequence of subsetss "minibatch" of the data 
#to get an estimate for the cost gradient. optional parameters will monitor the 
#set cost, accuracy for both training and evaluation sets.
    def SGD(self, training_data, epochs, mini_batch_size, eta, penalty,
            lmbda = 0.0,
            evaluation_data=None,
            monitor_evaluation_cost=False,
            monitor_evaluation_accuracy=False,
            monitor_training_cost=False,
            monitor_training_accuracy=False,
            monitor_roc=False,
            monitor_roc_test=False):
        
        if evaluation_data: n_data = len(evaluation_data)
        n = len(training_data)
        self.evaluation_cost, evaluation_accuracy = [], []
        self.training_cost, self.training_accuracy = [], []
        self.training_data, self.training_roc = [], []
        self.testing_roc = []
        for j in xrange(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in xrange(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_weights_bias(
                    mini_batch, eta, lmbda, len(training_data), penalty)
            #print "Epoch %s training complete" % j
            if monitor_training_cost:
                cost = self.total_cost(training_data, lmbda, penalty)
                self.training_cost.append(cost)
                #print "Cost on training data: {}".format(cost)
            if monitor_training_accuracy:
                accuracy = self.accuracy(training_data)
                self.training_accuracy.append(accuracy)
                print "Accuracy on training data: {} / {}".format(
                    accuracy, n)
            if monitor_roc:
                roc = self.rocauc(training_data)
                self.training_roc.append(roc)
                #print "Roc on training data: {}".format(roc)
            if monitor_roc_test:
                roc = self.rocauc(evaluation_data)
                self.testing_roc.append(roc)
                #print "Roc on testing data: {}".format(roc)
            if monitor_evaluation_cost:
                cost = self.total_cost(evaluation_data, lmbda, penalty)
                self.evaluation_cost.append(cost)
                #print "Cost on evaluation data: {}".format(cost)
            if monitor_evaluation_accuracy:
                accuracy = self.accuracy(evaluation_data)
                evaluation_accuracy.append(accuracy)
                #print "Accuracy on evaluation data: {} / {}".format( \
                #    self.accuracy(evaluation_data), n_data)
            #print
        return self.evaluation_cost, evaluation_accuracy, \
               self.training_cost, self.training_accuracy, \
               self.training_data, self.training_roc, \
               self.weights, self.biases

#this calls on the backpropagation method to update the weights and biases for each layer
#(note the first layer is the input and has no biases). at each step we send (w,b) to
#(w - eta*[nabla_w(cost) + (lambda/n)*w], b - eta*nabla_b(cost))
    def update_weights_bias(self, mini_batch, eta, lmbda, n, pen):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y, pen)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb
                        for b, nb in zip(self.biases, nabla_b)]
        
#backpropagation algorithm has an extra parameter penalty=pen which depends on the proportion 
#of the minority class 1. it boils down to using a faster learning rate on the 1s than on 
#the 0s.  the class importance of the 1s is implicitly included in the cost function, since
#these multiplicative importances show up in the gradient of the cost. backprop(x,y,pen)
#rturns a tuple of vectors (nabla_b,nabla_w) representing the gradient nabla_(b,w)C.
    def backprop(self, x, y, pen):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        delta = (self.cost).delta(zs[-1], activations[-1], y, pen)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # backward pass
        for l in xrange(2, self.num_layers):
            z = zs[-l] # we use the same last z vector to compute all nabla_b/w
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def accuracy(self, data):
        """Return the number of inputs in ``data`` for which the neural
        network outputs the correct result. The neural network's
        output is assumed to be the index of whichever neuron in the
        final layer has the highest activation.
        """
        results = [(round(self.feedforward(x)), y) for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)
    
    def rocauc(self, data):
        pred = [self.feedforward(x)[0][0] for (x, y) in data]
        target = [y for (x, y) in data]
        return roc_auc_score(target, pred)
    
    
    def get_data(self, data):
        return self.data

    
    def predict(self, data):
        result = [self.feedforward(x)[0][0] for x in data]
        return result
    
    def train_cost(self, data):
        return self.training_cost
    
    def evaluate_cost(self, data):
        return self.evaluation_cost
    
    def train_roc(self, data):
        return self.training_roc
    
    def test_roc(self, data):
        return self.testing_roc
    
    def trainning_data(self):
        return self.training_data
    
    def train_acc(self, data):
        return self.training_accuracy

#function to monitor the total cost
    def total_cost(self, data, lmbda, penalty):
        cost = 0.0
        for x, y in data:
            a = self.feedforward(x)
            cost += self.cost.fn(a, y, penalty)/len(data)
        cost += 0.5*(lmbda/len(data))*sum(
            np.linalg.norm(w)**2 for w in self.weights)
        return cost

    def save(self, filename):
        """Save the neural network to the file ``filename``."""
        data = {"sizes": self.sizes,
                "weights": [w.tolist() for w in self.weights],
                "biases": [b.tolist() for b in self.biases],
                "cost": str(self.cost.__name__)}
        f = open(filename, "w")
        json.dump(data, f)
        f.close()
    

#### Loading a Network
def load(filename):
    f = open(filename, "r")
    data = json.load(f)
    f.close()
    cost = getattr(sys.modules[__name__], data["cost"])
    net = Network(data["sizes"], cost=cost)
    net.weights = [np.array(w) for w in data["weights"]]
    net.biases = [np.array(b) for b in data["biases"]]
    return net

def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

In [3]:
#####################
# Read train and test
#####################
train = pd.read_csv("/Users/boulenge/Desktop/Projects/Project4 - ML/train.csv", sep=',',\
                        encoding='utf-8')
train = train.set_index("ID")
test = pd.read_csv("/Users/boulenge/Desktop/Projects/Project4 - ML/test.csv", sep=',',\
                        encoding='utf-8')
test = test.set_index("ID")

In [4]:
#############################
# Remove the similar features
#############################
Full_train = train.iloc[:, :369]
temp = Full_train
ind_same = {}
for i in range(len(temp.columns)):
    tp = temp.drop(temp.columns[i], inplace=False, axis=1)
    for j in range(i+1, len(tp.columns)):
        lst = reduce(lambda x, y: x + y, ind_same.values(), [])
        if all(temp.iloc[:, i] == tp.iloc[:, j]):
            if i not in ind_same.keys():
                if i not in lst:
                    if j >= i: ind_same[i] = [j+1]
                    else: ind_same[i] = [j]
            else:
                if j >= i: ind_same[i].append(j+1)
                else: ind_same[i].append(j)

print "Full_train shape before dropping similar features: " + str(Full_train.shape)
ind_drop = reduce(lambda x, y: x + y, ind_same.values(), [])                 
Full_train.drop(Full_train.columns[ind_drop], inplace=True, axis=1)
print "Full_train shape after: " + str(Full_train.shape)

Full_train shape before dropping similar features: (76020, 369)
Full_train shape after: (76020, 321)


In [5]:
#############################################
### Cross-Validation
#############################################
# Cross-Validation and evaluate_model
train_target = train.iloc[:, 369]
X_train, X_test, y_train, y_test = \
             cv.train_test_split(Full_train, train_target, test_size=0.25, random_state=0, \
                                 stratify = train_target)

def evaluate_model(clf):
    """Scores a model using log loss with the created train and test sets."""
    start = time.time()
    clf.fit(X_train, y_train)
    print "Train score:", sklearn.metrics.roc_auc_score(y_train, clf.predict_proba(X_train)[:,1])
    print "Test score:", sklearn.metrics.roc_auc_score(y_test, clf.predict_proba(X_test)[:,1])
    print "Total time:", time.time() - start

print "Training: " + str(X_train.shape) + str(y_train.shape)
print "Test: " + str(X_test.shape) + str(y_test.shape)

Training: (57015, 321)(57015,)
Test: (19005, 321)(19005,)


In [6]:
#############################################
### Normalize train and test
#############################################
# Normalize train (with no target)
Full_train = train.iloc[:, :369]
Full_train = min_max_scaler.fit_transform(Full_train)
Full_train = pd.DataFrame(Full_train, index = train.index)
#Full_train.to_csv("/Users/boulenge/Desktop/Projects/Project4 - ML/Full_train.csv")
train_target = train.iloc[:, 369]

# Normalize test
Full_test = test
Full_test = min_max_scaler.fit_transform(Full_test)
Full_test = pd.DataFrame(Full_test, index = test.index)
#Full_test.to_csv("/Users/boulenge/Desktop/Projects/Project4 - ML/Full_test.csv")

In [7]:
########################################################
##USING RANDOM FOREST TO WEED OUT ZERO VARIANCE FEATURES
########################################################
randomForest = ensemble.RandomForestClassifier(n_estimators=50, class_weight = 'balanced', \
                                               max_depth =10, oob_score=True)
randomForest.fit(X_train, y_train)

print str(np.sum(randomForest.feature_importances_==0)) + " useless features"

#remove unimportant features by index
feature_imprtance = zip(Full_train.columns, randomForest.feature_importances_)
dtype = [('feature', 'S10'), ('importance', 'float')]
feature_imprtance = np.array(feature_imprtance, dtype = dtype)
feature_sort = np.sort(feature_imprtance, order='importance')[::-1]
imp = np.sort([int(i) for (i, j) in feature_sort if j!=0])
useless = [int(i) for (i, j) in feature_sort if j==0]
#filtered_train = Full_train.iloc[:, imp]
#filtered_train.to_csv('/Users/boulenge/Desktop/Projects/Project4 - ML/filtered_train.csv')

# filtering out the useless features in Full_train, train_target and Full_test
Full_train = Full_train.iloc[:, imp]
Full_test = Full_test.iloc[:, imp]

86 useless features


In [8]:
#############################################
### Convert the training and target data
#############################################
trainarray = [np.reshape(x, (len(Full_train.columns), 1)) for x in Full_train.values]
target = train.iloc[:,369].values.T

# Convert the testing data
testarray = [np.reshape(x, (len(Full_test.columns), 1)) for x in Full_test.values]

# Full training data
training_data = zip(trainarray, target)

In [None]:
#############################################
### Simple Neural Net instance
#############################################

In [9]:
##################################################################
### GridSearch for tuning parameters: averaging multiple times ###
##################################################################

# organizing the results
df_res = pd.DataFrame({'penalty': [], 'lmbda': [], 'eta': [], \
                       'roc_train': [], 'cost_train': [], \
                       'roc_test': [], 'cost_test': [], 'run_id': [], \
                       'roc_test_avg': [], 'roc_train_avg': [], 'roc_test_avg_run': []})

# Reload train to get lines indices
train_line = pd.read_csv("/Users/boulenge/Desktop/Projects/Project4 - ML/train.csv", sep=',',\
                        encoding='utf-8')

# size of the Training sample
N_samp = 2000
# size of the Testing sample
N_samp_test = 2000

#Pen = np.arange(16, 20, 1)
#Lmbda = np.arange(6, 25, 4)
#Eta = [1e-4, 1e-3, 1e-2]
#Run_count = range(1, 5)

Pen = np.arange(16, 20, 1)
Lmbda = np.arange(8, 20, 2)
Eta = [1e-4, 1e-3, 1e-2]
Run_count = range(1, 5)


result, best_roc, best_cost = [], [], []
count = 1
Nb_step = len(Pen)*len(Lmbda)*len(Eta)*len(Run_count)

run_count = 1
best_weights = []
best_biases = []
roc_avg = 0

for run_c in Run_count:
    ind_samp_train = random.sample(range(len(Full_train)), N_samp)
    train_temp = Full_train.iloc[ind_samp_train, :]
    
    out_train = [i for i in range(len(Full_train)) if i not in ind_samp_train]
    ind_samp_test = random.sample(out_train, N_samp_test)
    test_temp = Full_train.iloc[ind_samp_test, :]
    
    # Neural input
    train_array = [np.reshape(x, (len(train_temp.columns), 1)) for x in train_temp.values]
    target_train = train.iloc[ind_samp_train, 369]
    training_data_r = zip(train_array, target_train)
    
    test_array = [np.reshape(x, (len(test_temp.columns), 1)) for x in test_temp.values]
    target_test = train.iloc[ind_samp_test, 369]
    testing_data_r = zip(test_array, target_test)
    
    for pen in Pen:
        for lmbda in Lmbda:
            for eta in Eta:
                net_r = Network([len(training_data_r[0][0]), 100, 10, 1], [], [])
                net_r.SGD(training_data=training_data_r, \
                    evaluation_data = testing_data_r, \
                    epochs=2, mini_batch_size=500, \
                    eta=eta, lmbda = lmbda, penalty = pen, \
                    monitor_evaluation_cost = True, \
                    monitor_evaluation_accuracy = False, \
                    monitor_training_cost = True, \
                    monitor_training_accuracy = False, \
                    monitor_roc = True, monitor_roc_test = True)
                    
                temp = pd.DataFrame({'penalty': [pen], \
                                         'lmbda': [lmbda], 'eta': [eta], \
                             'roc_train': [net_r.training_roc], \
                             'cost_train': [net_r.training_cost], \
                             'roc_test': [net_r.testing_roc], \
                             'cost_test': [net_r.evaluation_cost], 'run_id': [run_c]})
                df_res = pd.concat([df_res, temp], axis=0)
                    
                for nb in df_res.run_id:
                    avg = []
                    for val in df_res[df_res.run_id == nb]['roc_test']:
                        avg.append(np.mean(val))
                    df_res.loc[df_res.run_id == nb, 'roc_test_avg'] = avg
                    avg = []
                    for val in df_res[df_res.run_id == nb]['roc_train']:
                        avg.append(np.mean(val))
                    df_res.loc[df_res.run_id == nb, 'roc_train_avg'] = avg
                
                df_res.to_csv("df_res.csv")
                
                roc_current = net_r.testing_roc
                if len(best_weights) == 0:
                    best_weights = net_r.weights
                    best_biases = net_r.biases
                else:
                    if roc_current > roc_avg:
                        best_weights = net_r.weights
                        best_biases = net_r.biases 
                
                print "step completed: {} / {}".format(count, Nb_step)
                count += 1
                
    #df_res.loc[df_res.run_id == nb, 'roc_test_avg_run'] = roc_test_run[nb-1]
                    
roc_test_run = df_res.groupby('run_id')['roc_test_avg'].agg(np.mean).tolist()
for nb in Run_count:
    df_res.loc[df_res.run_id == nb, 'roc_test_avg_run'] = roc_test_run[nb-1]

df_res.to_csv("df_res.csv")
np.savetxt("previous_best_weights.csv", best_weights, delimiter=",")
np.savetxt("previous_best_biases.csv", best_biases, delimiter=",")

Default weights
step completed: 1 / 288
Default weights
step completed: 2 / 288
Default weights
step completed: 3 / 288
Default weights
step completed: 4 / 288
Default weights
step completed: 5 / 288
Default weights
step completed: 6 / 288
Default weights
step completed: 7 / 288
Default weights
step completed: 8 / 288
Default weights
step completed: 9 / 288
Default weights
step completed: 10 / 288
Default weights
step completed: 11 / 288
Default weights
step completed: 12 / 288
Default weights
step completed: 13 / 288
Default weights
step completed: 14 / 288
Default weights
step completed: 15 / 288
Default weights
step completed: 16 / 288
Default weights
step completed: 17 / 288
Default weights
step completed: 18 / 288
Default weights
step completed: 19 / 288
Default weights
step completed: 20 / 288
Default weights
step completed: 21 / 288
Default weights
step completed: 22 / 288
Default weights
step completed: 23 / 288
Default weights
step completed: 24 / 288
Default weights
step comp

TypeError: Mismatch between array dtype ('object') and format specifier ('%.18e')

In [12]:
#############################################
### Grid Search summary
#############################################
# Model where the best epoch occurred
best_item = [max(x) for x in df_res.roc_test ]
best_item = df_res[best_item == max(best_item)]
best_item

Unnamed: 0,cost_test,cost_train,eta,lmbda,penalty,roc_test,roc_test_avg,roc_test_avg_run,roc_train,roc_train_avg,run_id
0,"[0.195977352837, 0.195974512453]","[0.186149482041, 0.186145812965]",0.0001,10,16,"[0.751547533285, 0.751537527682]",0.751543,0.51955,"[0.626982858361, 0.626979052491]",0.626981,2


In [13]:
print str(len(best_item.roc_test.tolist()[0])) + " epochs"
ind_best = np.where(best_item.roc_test.tolist()[0] == max(best_item.roc_test.tolist()[0]))
ind_worst = np.where(best_item.roc_test.tolist()[0] == min(best_item.roc_test.tolist()[0]))
best_eta = best_item.eta.values[0]
best_lmbda = best_item.lmbda.values[0]
best_pen = best_item.penalty.values[0]
best_run_id = best_item.run_id.values[0]
print "Best roc test: " + str(max(best_item.roc_test.tolist()[0]))
print "Best roc test: {}, obtained after {} / {} epochs, for the model with eta = {}, \
lmbda = {}, penalty = {} on the {} running time".format(\
max(best_item.roc_test.tolist()[0]), ind_best, len(best_item.roc_test.tolist()[0]), \
best_eta, best_lmbda, best_pen, best_run_id)

2 epochs
Best roc test: 0.751547533285
Best roc test: 0.751547533285, obtained after (array([0]),) / 2 epochs, for the model with eta = 0.0001, lmbda = 10.0, penalty = 16.0 on the 2.0 running time


In [14]:
# Model with the best averaged roc test
best_avg = df_res[df_res.roc_test_avg == max(df_res.roc_test_avg)]
best_avg

Unnamed: 0,cost_test,cost_train,eta,lmbda,penalty,roc_test,roc_test_avg,roc_test_avg_run,roc_train,roc_train_avg,run_id
0,"[0.195977352837, 0.195974512453]","[0.186149482041, 0.186145812965]",0.0001,10,16,"[0.751547533285, 0.751537527682]",0.751543,0.51955,"[0.626982858361, 0.626979052491]",0.626981,2


In [15]:
# Model with the best averaged roc test among those for which roc_test_avg < roc_train_avg
best_cons = df_res[df_res.roc_test_avg <= df_res.roc_train_avg]
best_cons = best_cons[best_cons.roc_test_avg == max(best_cons.roc_test_avg)]
best_cons

Unnamed: 0,cost_test,cost_train,eta,lmbda,penalty,roc_test,roc_test_avg,roc_test_avg_run,roc_train,roc_train_avg,run_id
0,"[0.204901983917, 0.204885998622]","[0.21240566043, 0.212393686673]",0.001,10,18,"[0.717977213043, 0.717984051866]",0.717981,0.47383,"[0.721483744053, 0.721490028973]",0.721487,3


In [None]:
## best: eta = 0.0001, lmbda = 17.0, penalty = 16.0 : roc_test_avg_run = 0.466
## best among test < train: eta = 0.01, lmbda = 9.0, penalty = 18.0 : roc_test_avg_run = 0.466

## Bigger sample:
## best: eta = 0.01, lmbda = 22.0, penalty = 16.0 : roc_test_avg_run = 0.466
## best among test < train: eta = 0.01, lmbda = 9.0, penalty = 18.0 : roc_test_avg_run = 0.466

In [17]:
#############################################
### Train the best model
#############################################
net_b = Network([len(training_data[0][0]), 100, 10, 1], previous_best_weights, \
                previous_best_biases)
net_b.SGD(training_data=training_data, \
          evaluation_data = None, \
                    epochs=50, mini_batch_size=500, \
                    eta=1e-1, lmbda = 10, penalty = 18, \
                    monitor_evaluation_cost = False, \
                    monitor_evaluation_accuracy = False, \
                    monitor_training_cost = True, \
                    monitor_training_accuracy = False, \
                    monitor_roc = True, monitor_roc_test = False)
print net_b.training_roc

Best previous weights
[0.68149845677146048, 0.69111586446078221, 0.69771095309966524, 0.71547341004713882, 0.71775129813257166, 0.71889381425277221, 0.72022644275685954, 0.72119097425401368, 0.72190298787593643, 0.72206691413157387, 0.72235894114170673, 0.72266618761518076, 0.72299064790500778, 0.72341035221112004, 0.72353757189870271, 0.72376464355975867, 0.72401727299126573, 0.72433191405216402, 0.72454992688829356, 0.72491685824597774, 0.72511789176159913, 0.72536215219576261, 0.72561335188561249, 0.72569128874253841, 0.72599256809358648, 0.72659921339803069, 0.72746910191679359, 0.72695242788710912, 0.72741489922670821, 0.72905518855615115, 0.7295032304329746, 0.73097400430969672, 0.73272067962305232, 0.73448617607379618, 0.73627039121228677, 0.73764253795791079, 0.73965092429428669, 0.74077910657137691, 0.74138574732250362, 0.74256869234771661, 0.7441538683235982, 0.74469965398794824, 0.74598721375661348, 0.7467479205545926, 0.74831473072482413, 0.74912107769955627, 0.750421912665

In [10]:
previous_best_weights = best_weights
previous_best_biases = best_biases