### Multinomial Logistic Regression with Tensorflow Iris data

In [1]:
import json
import matplotlib
import warnings
import pandas as pd
import numpy as np
import math
import tensorflow as tf
from matplotlib import pyplot as plt
from IPython.core.pylabtools import figsize
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import roc_auc_score

warnings.simplefilter("ignore")
root = r'C:\\Users\\admin\\Desktop\\Python_Prog\\PyMC3\\Styles\\bmh_matplotlibrc.json'
s = json.load(open(root))
matplotlib.rcParams.update(s)
% matplotlib inline

In [2]:
headers = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris = pd.read_csv("C:\\Users\\admin\\Desktop\\Deep Learning\\data\\iris.csv", names = headers)
iris.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [3]:
iris.sepal_length =(( iris.sepal_length  - iris.sepal_length.mean())/ iris.sepal_length.std()).astype(float)
iris.sepal_width =(( iris.sepal_width  - iris.sepal_width.mean())/ iris.sepal_width.std()).astype(float)
iris.petal_length =(( iris.petal_length - iris.petal_length.mean())/ iris.petal_length.std()).astype(float)
iris.sepal_length =(( iris.petal_width  - iris.petal_width.mean())/ iris.petal_width.std()).astype(float)

In [4]:
#split our data into train, validation and test sets
#before split always remenber to chech if the class are well balance
shuffled_index = np.random.permutation(iris.index)
shuffled_iris = iris.loc[shuffled_index]

In [5]:
shuffled_iris.species = (shuffled_iris.species == 'Iris-versicolor').values.astype(int)
shuffled_iris.loc[:, ('not_species')] = shuffled_iris['species'] == 0
shuffled_iris.loc[:, ('not_species')] = shuffled_iris['not_species'].astype(int)

In [6]:
#tensor are a genric version of vector and matrix
#A list is  1D tensor
#A list of list is a matrix, a matrix is a 2D tensor
#A list of list of list is then a 3D tensor

X = shuffled_iris.loc[:, ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].as_matrix()
y = shuffled_iris.loc[:, ['species', 'not_species']].as_matrix()

In [7]:
trainX, trainY = X[:50, :], y[:50, :]
validX, validY = X[50:100], y[50:100]
testX, testY = X[100:], y[100:]

In [8]:
def accuracy(predictions, labels):
    return 100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/len(predictions)

In [9]:
#SGD
batch_size = 40
Lambda = 1e-5
graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape = (batch_size, X.shape[1]))
    tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, y.shape[1]))
    tf_validation_dataset = tf.placeholder(tf.float32, shape = (validX.shape))
    tf_test_dataset = tf.placeholder(tf.float32, shape = (testX.shape))
    
    #Variables
    weights = tf.Variable(tf.truncated_normal([X.shape[1], y.shape[1]]))
    biases = tf.Variable(tf.zeros([y.shape[1]]))
    
    #Training
    logits = tf.matmul(tf_train_dataset, weights) + biases
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels = tf_train_labels, logits = logits)) \
        + (Lambda/2*batch_size)*(tf.nn.l2_loss(weights))
    
    #Optimization
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    #Predctions
    train_predictions = tf.nn.softmax(logits)
    valid_predictions = tf.nn.softmax(tf.matmul(tf_validation_dataset, weights) + biases)
    test_predictions = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [10]:
num_steps = 3001
with tf.Session(graph = graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    for step in np.arange(num_steps):
        offset = (step * batch_size) % (trainY.shape[0] - batch_size)
        batch_data = trainX[offset : (offset + batch_size), :]
        batch_labels = trainY[offset : (offset + batch_size), :]
        feed_dict = {tf_train_dataset:batch_data, tf_train_labels:batch_labels,
                                    tf_validation_dataset:validX, tf_test_dataset:testX}
        _, l, predictions = session.run([optimizer, loss, train_predictions], feed_dict = feed_dict)
        if step%500 == 0:
            print('Loss at step {0} : {1}'.format(step, l))
            print('Training accuracy : %.1f'%accuracy(predictions, batch_labels))
            print('Validation accuracy : %.1f'%accuracy(valid_predictions.eval(
                        feed_dict = {tf_validation_dataset:validX}), validY))
    print('Test accuracy : %.1f'%accuracy(test_predictions.eval(
                feed_dict = {tf_test_dataset:testX}), testY))

Initialized
Loss at step 0 : 1.3457576036453247
Training accuracy : 40.0
Validation accuracy : 38.0
Loss at step 500 : 0.42070624232292175
Training accuracy : 82.5
Validation accuracy : 78.0
Loss at step 1000 : 0.4203040599822998
Training accuracy : 82.5
Validation accuracy : 76.0
Loss at step 1500 : 0.42024701833724976
Training accuracy : 82.5
Validation accuracy : 76.0
Loss at step 2000 : 0.42021632194519043
Training accuracy : 82.5
Validation accuracy : 76.0
Loss at step 2500 : 0.4201897084712982
Training accuracy : 82.5
Validation accuracy : 76.0
Loss at step 3000 : 0.4201652705669403
Training accuracy : 82.5
Validation accuracy : 76.0
Test accuracy : 74.0


72.0 to 78.0 after increasing the size of the batch from 30 to 40
but it seems our model is overfittinga bit.

In [11]:
_graph = tf.Graph()
hidden_units = 1024
batch_size = 40
Lambda = 1e-5
with _graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape = (batch_size, X.shape[1]))
    tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, y.shape[1]))
    tf_validation_dataset = tf.placeholder(tf.float32, shape=(validX.shape))
    tf_test_dataset = tf.placeholder(tf.float32, shape = (testX.shape))
    
    #variables
    weights1 = tf.Variable(tf.truncated_normal([X.shape[1], hidden_units]))
    biases1 = tf.Variable(tf.zeros([hidden_units]))
    weights2 = tf.Variable(tf.truncated_normal([hidden_units, y.shape[1]]))
    biases2 = tf.Variable(tf.zeros([y.shape[1]]))
    
    #training
    hidden1 = tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1)
    logits = tf.matmul(hidden1, weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf_train_labels, logits = logits))\
    + (Lambda/(2*batch_size)) * (tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2))
    
    #Optimization
    global_step = tf.Variable(0, trainable = False)
    learning_rate = tf.train.exponential_decay(0.7, global_step, 100000, 0.96, staircase = True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
    
    #Predictions
    train_predictions = tf.nn.softmax(logits)
    valid_hidden = tf.nn.relu(tf.matmul(tf_validation_dataset, weights1) + biases1)
    valid_predictions = tf.nn.softmax(tf.matmul(valid_hidden, weights2) + biases2)
    test_hidden = tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1)
    test_predictions = tf.nn.softmax(tf.matmul(test_hidden, weights2)+ biases2)

In [12]:
num_steps = 3001
with tf.Session(graph = _graph) as session:
    tf.global_variables_initializer().run()
    print('Initiliazed')
    for step in np.arange(num_steps):
        offset = (step * batch_size)%(trainY.shape[0] - batch_size)
        batch_data = trainX[offset:(offset + batch_size), :]
        batch_labels = trainY[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset:batch_data, tf_train_labels:batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_predictions], feed_dict = feed_dict)
        if step % 500 == 0:
            print('Minibatch error at step {0} : {1}'.format(step, l))
            print('Minibatch accuracy  : %.1f%%'%accuracy(predictions, batch_labels))
            print('Validation accuracy : %.1f%%'%accuracy(valid_predictions.eval(
                        feed_dict = {tf_validation_dataset:validX}), validY))
    print('Test accuracy : %.1f%%'%accuracy(test_predictions.eval(feed_dict = {tf_test_dataset:testX}), testY))

Initiliazed
Minibatch error at step 0 : 21.402929306030273
Minibatch accuracy  : 40.0%
Validation accuracy : 66.0%
Minibatch error at step 500 : 0.00043296514195390046
Minibatch accuracy  : 100.0%
Validation accuracy : 96.0%
Minibatch error at step 1000 : 0.0004061236686538905
Minibatch accuracy  : 100.0%
Validation accuracy : 96.0%
Minibatch error at step 1500 : 0.00038712029345333576
Minibatch accuracy  : 100.0%
Validation accuracy : 96.0%
Minibatch error at step 2000 : 0.00037301916745491326
Minibatch accuracy  : 100.0%
Validation accuracy : 96.0%
Minibatch error at step 2500 : 0.000361993967089802
Minibatch accuracy  : 100.0%
Validation accuracy : 96.0%
Minibatch error at step 3000 : 0.0003529555397108197
Minibatch accuracy  : 100.0%
Validation accuracy : 96.0%
Test accuracy : 92.0%


In [13]:
# I achieved 94% accuracy with a 3 layer Neural Network without regularization
# 98% with regularization nice work but the ai is to achieve 1 accuracy
# let's add dropout before the readout layer : Dropout seems to reduce overfitting a lot but my accuracy went
# down 92%, we will go without dropout fro iris data
#Let's try learning rate decay : seems the model is not overfiting at all !
# 92% on the validation set and 92% on the test set. the model peform better on the test set !!!! 
#(always implement learning rate decay seems effcient to reduce overfitting)