In [1]:
import pandas as pd 
import numpy as np  
import random
from sklearn.model_selection import train_test_split  
import matplotlib.pyplot as plt
import tensorflow as tf 
from sklearn.metrics import accuracy_score

# Installing Theano
# pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git

# Installing Tensorflow
# Install Tensorflow from the website: https://www.tensorflow.org/versions/r0.12/get_started/os_setup.html

# Installing Keras
# pip install --upgrade keras
import warnings
warnings.filterwarnings('ignore')


In [2]:
%store -r pca_train_x
%store -r pca_test_x

%store -r train_x
%store -r test_x
%store -r train_y
%store -r test_y

%store -r train_x_two_features
%store -r test_x_two_features
%store -r train_y_two_features
%store -r test_y_two_features

%store -r pca_train_x
%store -r pca_test_x

In [3]:
# START: OWN CODE
# Learning Parameters
rate   =   0.0010  # training rate
epochs =  200     # number of full training cycles 
banch_size  = 68   # number of data points to train per batch

In [4]:
# Network Parameters
n_hidden_1 = 512  # number of nodes in hidden layer 1
n_hidden_2 = 512  # number of nodes in hidden layer 2
n_input    = train_x.shape[-1]  # 20
n_classes  = 2

In [5]:
X = tf.placeholder('float', [None,n_input])
Y = tf.placeholder('float', [None,n_classes])

In [6]:
weights = {
    'w1' : tf.Variable(tf.random_normal([n_input,    n_hidden_1])),
    'w2' : tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes ]))
}

biases = {
    'b1' : tf.Variable(tf.random_normal([n_hidden_1])),
    'b2' : tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes ]))
}

In [7]:
def neural_network(X, weights, biases):  
    # Hidden Layer 1
    layer1 = tf.matmul(X, weights['w1'])
    layer1 = tf.add(layer1, biases['b1'])
    layer1 = tf.nn.softmax(layer1)
    
    # Hidden Layer 2
    layer2 = tf.matmul(layer1, weights['w2'])
    layer2 = tf.add(layer2, biases['b2'])
    layer2 = tf.nn.softmax(layer2)
    
    # Output Layer
    output = tf.matmul(layer2, weights['out'])
    output = tf.add(output, biases['out'])
    output = tf.nn.softmax(output)
    
    return output 

In [8]:
# In this section we define the optimizer and loss functions. 
# We also define our notion of correct and incorrect prediction and what we mean by accuracy.

def run_my_neural_network(train_x,train_y,test_x,test_y):
    train_y = train_y.reshape(len(train_y), 1)
    test_y = test_y.reshape(len(test_y), 1)

    # Classification: label(gender) = {male, female} → [1.0, 0.0], [0.0, 1.0]  
    labels_tmp = []  
    for label in train_y:  
        tmp = []  
        if label[0] == 0:  
            tmp = [0.0, 1.0]  
        else:   
            tmp = [1.0, 0.0]  
        labels_tmp.append(tmp)  
    train_y = np.array(labels_tmp) 
    
    
    
    labels_tmp = []  
    for label in test_y:  
        tmp = []  
        if label[0] == 0:  
            tmp = [0.0, 1.0]  
        else:   
            tmp = [1.0, 0.0]  
        labels_tmp.append(tmp)  
    test_y = np.array(labels_tmp) 
    
    model = neural_network(X, weights, biases)
    f_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
    f_optimizer = tf.train.AdamOptimizer(learning_rate=rate).minimize(f_cost)
    with tf.Session() as s:
        s.run(tf.global_variables_initializer())
        for epoch in range(epochs):
            cost_avg = 0.
            batch_total = len(train_x) // banch_size
            for banch in range(batch_total):
                voice_banch = train_x[banch*banch_size:(banch+1)*(banch_size)]  
                label_banch = train_y[banch*banch_size:(banch+1)*(banch_size)]        
                _, cost = s.run([f_optimizer,f_cost], feed_dict={X:voice_banch, Y: label_banch})        
                cost_avg += cost / batch_total
        
            print('Epoch {}: cost={:.4f}'.format(epoch+1, cost_avg))
    
        # testing
        
        # This gives us a list of booleans
        prediction = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))  
        # We cast to floating point numbers and then take the mean
        accuracy = tf.reduce_mean(tf.cast(prediction, dtype=tf.float32))  
        accuracy = s.run(accuracy, feed_dict={X: train_x, Y: train_y})  
        print('In-sample accuracy in Neural Network: %s'  % (accuracy)) 
        
        accuracy = tf.reduce_mean(tf.cast(prediction, dtype=tf.float32)) 
        accuracy = s.run(accuracy, feed_dict={X: test_x, Y: test_y})  
        print('Out-of-sample accuracy in Neural Network: %s'  % (accuracy)) 

# END: OWN CODE

In [9]:
run_my_neural_network(train_x,train_y,test_x,test_y)

Epoch 1: cost=0.8012
Epoch 2: cost=0.7955
Epoch 3: cost=0.7880
Epoch 4: cost=0.7778
Epoch 5: cost=0.7637
Epoch 6: cost=0.7444
Epoch 7: cost=0.7183
Epoch 8: cost=0.6840
Epoch 9: cost=0.6441
Epoch 10: cost=0.6043
Epoch 11: cost=0.5678
Epoch 12: cost=0.5359
Epoch 13: cost=0.5085
Epoch 14: cost=0.4854
Epoch 15: cost=0.4660
Epoch 16: cost=0.4496
Epoch 17: cost=0.4357
Epoch 18: cost=0.4240
Epoch 19: cost=0.4139
Epoch 20: cost=0.4053
Epoch 21: cost=0.3978
Epoch 22: cost=0.3912
Epoch 23: cost=0.3855
Epoch 24: cost=0.3803
Epoch 25: cost=0.3758
Epoch 26: cost=0.3717
Epoch 27: cost=0.3681
Epoch 28: cost=0.3648
Epoch 29: cost=0.3618
Epoch 30: cost=0.3590
Epoch 31: cost=0.3565
Epoch 32: cost=0.3542
Epoch 33: cost=0.3521
Epoch 34: cost=0.3501
Epoch 35: cost=0.3483
Epoch 36: cost=0.3467
Epoch 37: cost=0.3451
Epoch 38: cost=0.3437
Epoch 39: cost=0.3424
Epoch 40: cost=0.3411
Epoch 41: cost=0.3400
Epoch 42: cost=0.3388
Epoch 43: cost=0.3378
Epoch 44: cost=0.3368
Epoch 45: cost=0.3359
Epoch 46: cost=0.33

In [10]:
# use Keras deep learning library with a TensorFlow backend

In [11]:
import keras
from keras.models import Sequential
from keras.layers import Dense
def run_neural_network(train_x,train_y,test_x,test_y):
    
    train_y = train_y.reshape(len(train_y), 1)
    test_y = test_y.reshape(len(test_y), 1)
    # Classification: label(gender) = {male, female} → [1.0, 0.0], [0.0, 1.0]  
    labels_tmp = []  
    for label in train_y:  
        tmp = []  
        if label[0] == 0:  
            tmp = [0.0, 1.0]  
        else:   
            tmp = [1.0, 0.0]  
        labels_tmp.append(tmp) 
    train_y = np.array(labels_tmp) 

    labels_tmp = []  
    for label in test_y:  
        tmp = []  
        if label[0] == 0:  
            tmp = [0.0, 1.0]  
        else:   
            tmp = [1.0, 0.0]  
        labels_tmp.append(tmp)  
    test_y = np.array(labels_tmp) 
    
    
    #Initialisng the ANN
    classifier = Sequential()
    #Input Layer and First Hidden Layer
    classifier.add(Dense(units = 512, activation = 'softmax', input_dim = train_x.shape[1]))
    #Adding the Second hidden layer
    classifier.add(Dense(units = 512, activation = 'softmax'))
    # Adding the output layer
    classifier.add(Dense(units = 2, activation = 'softmax'))
    #Compiling the ANN
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    #Fitting ANN to the training set
    classifier.fit(train_x, train_y, batch_size = 68, epochs = 200, verbose = 0)
    pred_train_y = classifier.predict(train_x)
    pred_train_y = (pred_train_y > 0.5)
    labels_tmp = []  
    for label in pred_train_y:  
        tmp = []  
        if label[0] == True:  
            tmp = [1,0]  
        else:   
            tmp = [0,1]  
        labels_tmp.append(tmp)  
    new_result = np.array(labels_tmp) 
    print('In-sample accuracy in Neural Network using Keras package (with all features):%s' % (accuracy_score(train_y, new_result)))
   
    # Predicting the Test set results
    pred_y = classifier.predict(test_x)
    pred_y = (pred_y > 0.5)
    labels_tmp = []  
    for label in pred_y:  
        tmp = []  
        if label[0] == True:  
            tmp = [1,0]  
        else:   
            tmp = [0,1]  
        labels_tmp.append(tmp)  
    new_result = np.array(labels_tmp) 
    print('Out-of-sample accuracy in Neural Network using Keras package (with 2 features):%s' % (accuracy_score(test_y, new_result)))
    

Using TensorFlow backend.


In [12]:
run_neural_network(train_x,train_y,test_x,test_y)

In-sample accuracy in Neural Network using Keras package (with all features):0.997895622896
Out-of-sample accuracy in Neural Network using Keras package (with 2 features):0.974747474747
