In [1]:
## Loading MNIST dataset from keras
import keras
from sklearn.preprocessing import LabelBinarizer
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.metrics import roc_auc_score, accuracy_score


Using TensorFlow backend.


In [2]:
def load_dataset(flatten=False):
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
    # normalize
    X_train = X_train/255
    X_test = X_test/255
    # we reserve the last 10000 training examples for validation
    X_train, X_val = X_train[:-10000], X_train[-10000:]
    y_train, y_val = y_train[:-10000], y_train[-10000:]
    if flatten:
        X_train = X_train.reshape([X_train.shape[0], -1])
        X_val = X_val.reshape([X_val.shape[0], -1])
        X_test = X_test.reshape([X_test.shape[0], -1])
    return X_train, y_train, X_val, y_val, X_test, y_test


In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
## Printing dimensions
#print(X_train.shape, y_train.shape)
## Visualizing the first digit
#plt.imshow(X_train[0], cmap="Greys");


In [4]:
## Changing dimension of input images from N*28*28 to  N*784
X_train = X_train.reshape((X_train.shape[0],X_train.shape[1]*X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0],X_test.shape[1]*X_test.shape[2]))

In [6]:
print('Train dimension:');
print(X_train.shape)
print('Test dimension:');
print(X_test.shape)
## Changing labels to one-hot encoded vector
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)
print('Train labels dimension:');
print(y_train.shape)
print('Test labels dimension:');
print(y_test.shape)
y_train[0]

Train dimension:
(50000, 784)
Test dimension:
(10000, 784)
Train labels dimension:
(50000, 10)
Test labels dimension:
(10000, 10)


array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int64)

In [7]:
## Defining various initialization parameters for 784-512-256-10 MLP model
num_classes = y_train.shape[1]
num_features = X_train.shape[1]
num_output = y_train.shape[1]
num_layers_0 = 512
num_layers_1 = 256
starter_learning_rate = 0.001
regularizer_rate = 0.1

In [8]:
# Placeholders for the input data
input_X = tf.placeholder('float32',shape =(None,num_features),name="input_X")
input_y = tf.placeholder('float32',shape = (None,num_classes),name='input_Y')
## for dropout layer
keep_prob = tf.placeholder(tf.float32)

In [9]:
def first_layer(neurons_input,neurons_output,inputs, dropout,activation_func):
    
    weights.append(tf.Variable(tf.random_normal([neurons_input,neurons_output], stddev=(1/tf.sqrt(float(neurons_input))))))
    bias.append(tf.Variable(tf.random_normal([neurons_output])))
    
    if(activation_func=='relu'):
        visible_layer =tf.nn.relu(tf.matmul(inputs,weights[-1])+bias[-1])
    return visible_layer
    

In [10]:
def hdd_layer(neurons_input=0, neurons_output=0, dropout=False, previous_layer=0, activation_func=None) :
    weights.append(tf.Variable(tf.random_normal([neurons_input,neurons_output], stddev=(1/tf.sqrt(float(neurons_input))))))
    bias.append(tf.Variable(tf.random_normal([neurons_output])))

    if(activation_func=='relu'):
        hidden_output =tf.nn.relu(tf.matmul(previous_layer,weights[-1])+bias[-1])
    
    if(dropout):
        hidden_output= (tf.nn.dropout(hidden_output, keep_prob))
    return hidden_output


In [83]:
def last_layer(activation_func,neurons_input,neurons_output,previous_layer):
    weights.append(tf.Variable(tf.random_normal([neurons_input,neurons_output], name="W", 
                                                stddev=(1/tf.sqrt(float(neurons_input)))
                                                )))
    bias.append(tf.Variable(tf.random_normal([neurons_output]), name="B"))
    
    if(activation_func=='sigmoid'):
        predicted_y = tf.sigmoid(tf.matmul(previous_layer,weights[-1]) + bias[-1])
    elif(activation_func=='softmax'):
        predicted_y = tf.nn.softmax(tf.matmul(previous_layer,weights[-1]) + bias[-1])
    elif(activation_func=='leaky_relu'):
        predicted_y=tf.nn.leaky_relu(tf.matmul(previous_layer,weights[-1]) + bias[-1])
    elif(activation_func=='relu'):
        predicted_y=tf.nn.relu(tf.matmul(previous_layer,weights[-1]) + bias[-1])
    elif(activation_func=='tanh'):
        predicted_y=tf.nn.tanh(tf.matmul(previous_layer,weights[-1]) + bias[-1])
    return predicted_y


In [90]:
## Defining the loss function
#tf.nn.softmax_cross_entropy_with_logits_v2(logits=,labels=input_y) 
#loss= -tf.reduce_sum(input_y* tf.log(predicted_y), axis=1)

control = tf.constant([0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001])
def perda():
   
    xent = -input_y*tf.log(tf.add(tensor,tf.abs(predicted_y))) - (1-input_y)*tf.log(tf.add(control,tf.abs((1-predicted_y)))) # Cross-entropy
    cost = tf.reduce_mean(xent) 
    
    return cost

In [91]:
#learning_rate = tf.train.exponential_decay(starter_learning_rate, 0, 5, 0.85, staircase=True)
#optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    ## Adam optimzer for finding the right weight
## Variable learning rate
def otimizador(loss):  
    optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss, var_list=[weights,bias])
    return optimizer

In [92]:
## Here we call the functions to define layers, loss and optimezer
weights=[]
bias =[]
layers_outupt=first_layer(neurons_input=num_features,neurons_output =512,dropout=False,
                          inputs=input_X, activation_func='relu')

layers_outupt=hdd_layer(neurons_input=512,neurons_output=256,dropout=False,
                        previous_layer=layers_outupt,activation_func='relu')

layers_outupt=hdd_layer(neurons_input=256,neurons_output =128,
                        dropout=True,previous_layer=layers_outupt,activation_func='relu')

predicted_y= last_layer(neurons_input=128,neurons_output=num_classes,
                        previous_layer=layers_outupt,activation_func='softmax')

loss=perda()
optimizer=otimizador(loss)


In [93]:
## Metrics definition
correct_prediction = tf.equal(tf.argmax(y_train,1), tf.argmax(predicted_y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


In [94]:
## Training parameters
batch_size = 128
epochs=20
dropout_prob = 0.2
training_accuracy = []
training_loss = []
testing_accuracy = []

# Initialization
init = tf.global_variables_initializer()
with tf.Session() as session:
    session.run(init)

    for epoch in range(epochs):    
        arr = np.arange(X_train.shape[0])
        np.random.shuffle(arr)
        for index in range(0,X_train.shape[0],batch_size):
            session.run(optimizer, {input_X: X_train[arr[index:index+batch_size]],
                              input_y: y_train[arr[index:index+batch_size]],
                             keep_prob:dropout_prob})
        training_accuracy.append(session.run(accuracy, feed_dict= {input_X:X_train, 
                                                             input_y: y_train,keep_prob:1}))
        training_loss.append(session.run(loss, {input_X: X_train, 
                                          input_y: y_train,keep_prob:1}))

        ## Evaluation of model
        testing_accuracy.append(accuracy_score(y_test.argmax(1), 
                            session.run(predicted_y, {input_X: X_test,keep_prob:1}).argmax(1)))
        print("Epoch:{0}, Train loss: {1:.2f} Train acc: {2:.3f}, Test acc:{3:.3f}".format(epoch,
                                                                    training_loss[epoch],
                                                                    training_accuracy[epoch],
                                                                   testing_accuracy[epoch]))


Epoch:0, Train loss: nan Train acc: 0.099, Test acc:0.098


KeyboardInterrupt: 