In [1]:
import numpy as np
from sklearn import metrics   #Additional scklearn functions
from sklearn.model_selection import GridSearchCV   #Performing grid search
from sklearn.neural_network import MLPClassifier

### Data Splitting Function
- sklearn version of the function shuffles the rows, for this case we want to avoid it

In [2]:
def data_split(x, y, ratio):
    rows_number = y.shape[0]
    
    bound = int(round(rows_number*ratio))

    return x[0:bound,:] , x[bound:,:] , y[0:bound], y[bound:]

### Performance Metrics Function 
- numpy and sklearn performance metric functions are fine as well

In [3]:
def performance_metrics(pred_labels, true_labels):

    #Positive Cases
    PC = np.sum(true_labels == 1) 
    
    #Negative Cases
    NC = np.sum(true_labels == 0)
    
    # True Positive 
    TP = np.sum(np.logical_and(pred_labels == 1, true_labels == 1))
 
    # True Negative 
    TN = np.sum(np.logical_and(pred_labels == 0, true_labels == 0))
 
    # False Positive 
    FP = np.sum(np.logical_and(pred_labels == 1, true_labels == 0))
 
    # False Negative 
    FN = np.sum(np.logical_and(pred_labels == 0, true_labels == 1))
    
    #Accuracy
    Accuracy = (TP+TN)/float(TP+FP+TN+FN)
    
    #Precision
    Precision = (TP)/float(TP+FP)
    
    #Recall
    Recall = (TP)/float(TP+FN)
    
    #AUC
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, pred_labels)
    AUC = metrics.auc(fpr, tpr)
    
    print 'Positive Cases: %d \t True Positive: %d \t False Positive: %d' % (PC, TP, FP)
    print 'Negative Cases: %d \t True Negative: %d \t False Negative: %d' % (NC, TN, FN)
    print 'Accuracy: %.3f \t Precision: %.3f \t Recall: %.3f \t AUC: %.3f' % (Accuracy, Precision, Recall, AUC)

### Loading the Feature and Label Data

In [4]:
ratio = 0.85 # Train - Test set ratio


x = np.load('data/features_pca.npy').T #loading the features data
y = np.load('data/labels.npy') #loading the label data

x_train, x_test, y_train, y_test = data_split(x,y,ratio) #splitting the data     


### Setting the model parameters and training

In [5]:
mlp = MLPClassifier(
              hidden_layer_sizes=(200,50), # Nodes as of Input Size - 500 - Output Node 
              activation='relu', # Rectified Linear Unit to avoid vanishing gradient problem
              solver='adam', # Optimization part, helps to control the learning the rate
              alpha=0.0001, # Regularization term 
              batch_size='auto', # Leaving it to the sklearn 
              learning_rate='adaptive', # Slows down as it reaches an optima 
              learning_rate_init=0.001, 
              power_t=0.5, 
              max_iter=200, 
              shuffle= False, #shuffling is invalid in this case 
              random_state=None, 
              tol=0.0001, #iteration termination condition 
              verbose= True, warm_start=False, 
              momentum=0.9, # Adam Optimizer parameter
              nesterovs_momentum= False, early_stopping=False, #validation_fraction=0.1, 
              beta_1=0.9, beta_2=0.999, epsilon=1e-08)

In [6]:
mlp.fit(x_train, y_train) #Training
y_pred = mlp.predict(x_test) #Generating Predictions

Iteration 1, loss = 0.67260890
Iteration 2, loss = 0.60732580
Iteration 3, loss = 0.60316139
Iteration 4, loss = 0.60270208
Iteration 5, loss = 0.60220475
Iteration 6, loss = 0.60211982
Iteration 7, loss = 0.60158031
Iteration 8, loss = 0.60117278
Iteration 9, loss = 0.60083384
Iteration 10, loss = 0.60030463
Iteration 11, loss = 0.59980824
Iteration 12, loss = 0.59986214
Iteration 13, loss = 0.59901691
Iteration 14, loss = 0.59821113
Iteration 15, loss = 0.59745938
Iteration 16, loss = 0.59644758
Iteration 17, loss = 0.59619850
Iteration 18, loss = 0.59540210
Iteration 19, loss = 0.59492642
Iteration 20, loss = 0.59385275
Iteration 21, loss = 0.59312972
Iteration 22, loss = 0.59283077
Iteration 23, loss = 0.59157873
Iteration 24, loss = 0.59089941
Iteration 25, loss = 0.59018418
Iteration 26, loss = 0.58946580
Iteration 27, loss = 0.58840621
Iteration 28, loss = 0.58841781
Iteration 29, loss = 0.58735775
Iteration 30, loss = 0.58652965
Iteration 31, loss = 0.58559704
Iteration 32, los

### Performance Metrics

In [7]:
performance_metrics(y_pred, y_test)

Positive Cases: 6571 	 True Positive: 4670 	 False Positive: 2512
Negative Cases: 5351 	 True Negative: 2839 	 False Negative: 1901
Accuracy: 0.630 	 Precision: 0.650 	 Recall: 0.711 	 AUC: 0.621
