In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import linear_model
import time
import tensorflow as tf
import sklearn.preprocessing
%matplotlib inline

# 1 - Load Fashion MNIST by Pandas

In [2]:
TRAIN_FILE = '../fashionmnist/fashion-mnist_train.csv'
TEST_FILE = '../fashionmnist/fashion-mnist_test.csv'
train_data = pd.read_csv(TRAIN_FILE)
test_data = pd.read_csv(TEST_FILE)
train_data.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
X_train_all = np.array(train_data.iloc[:, 1:])
print('pixel value max=', np.max(X_train_all))
X_train_all = X_train_all/ 255
m_train_all = X_train_all.shape[0]
num_px = X_train_all.shape[1]
num_class = 10
y_train_all = np.array(train_data.iloc[:, 0], dtype = 'uint8').reshape(-1,1)
print('total number of training examples =', m_train_all, ', number of pixels =', num_px)

pixel value max= 255
total number of training examples = 60000 , number of pixels = 784


In [4]:
#Use only 5000 out of 60000 examples for batch, use 60000 for minibatch
#Although the original data are already random, still shuffle them for general purpose
m_train = 60000
permutation = list(np.random.permutation(m_train_all))
shuffled_X = X_train_all[permutation, :]
shuffled_y = y_train_all[permutation,:]
X_train = shuffled_X[0:m_train,:]
X_test = np.array(test_data.iloc[:, 1:])
X_test = X_test/ 255
#label to array
label_binarizer = sklearn.preprocessing.LabelBinarizer()
label_binarizer.fit(range(num_class))
y_test = label_binarizer.transform(np.array(test_data.iloc[:, 0], dtype = 'uint8'))
y_train = label_binarizer.transform(shuffled_y[0:m_train,:])

print('X_train shape', X_train.shape, 'y_train shape', y_train.shape)
print('number of training examples used:', X_train.shape[0])
print('X_test shape:', X_test.shape, 'y_test shape:', y_test.shape)

X_train shape (60000, 784) y_train shape (60000, 10)
number of training examples used: 60000
X_test shape: (10000, 784) y_test shape: (10000, 10)


# 2 - Neural Networks 

In [5]:
def initialization(dim):
    L = len(dim) - 1
    n_x = dim[0]
    n_y = dim[L]
    X = tf.placeholder(tf.float32, shape = [None, n_x])
    Y = tf.placeholder(tf.float32, shape = [None, n_y])
    paras = {}
    for l in range(1,L+1):
        paras['W'+str(l)] = tf.get_variable('W'+str(l),[dim[l-1],dim[l]], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
        paras['b'+str(l)] = tf.get_variable('b'+str(l), [1, dim[l]], initializer = tf.zeros_initializer())
    return X, Y, paras

def forward_cost(paras, X, Y, lambd, keep_prob):
    W1 = paras['W1']
    W2 = paras['W2']
    W3 = paras['W3']
    b1 = paras['b1']
    b2 = paras['b2']
    b3 = paras['b3']
    L = len(paras)//2
    Z1 = tf.add(tf.matmul(X, W1), b1)
    A1 = tf.nn.relu(Z1)
    A1_drop = tf.nn.dropout(A1, keep_prob)
    Z2 = tf.add(tf.matmul(A1_drop, W2), b2)
    A2 = tf.nn.relu(Z2)
    A2_drop = tf.nn.dropout(A2, keep_prob)
    Z3 = tf.add(tf.matmul(A2_drop, W3), b3)
    reg = 0
    for l in range(1, L+1):
        reg += lambd * tf.nn.l2_loss(paras['W' + str(l)]) 
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits = Z3) + reg)
    return Z3, cost

def create_mini_batches(X, y, mini_batch_size):
    m = X.shape[0]
    num_complete_minibatches = m//mini_batch_size
    mini_batches = []
    
    permutation = list(np.random.permutation(m))
    shuffled_X = X[permutation,:]
    shuffled_y = y[permutation,:]
    
    for i in range(num_complete_minibatches):
        mini_batch_X = shuffled_X[i*mini_batch_size:(i+1)*mini_batch_size,:]
        mini_batch_y = shuffled_y[i*mini_batch_size:(i+1)*mini_batch_size,:]
        mini_batches.append((mini_batch_X,mini_batch_y))
    if m%mini_batch_size != 0:
        mini_batch_X = shuffled_X[num_complete_minibatches*mini_batch_size:m,:]
        mini_batch_y = shuffled_y[num_complete_minibatches*mini_batch_size:m,:]
        mini_batches.append((mini_batch_X,mini_batch_y))
    
    return mini_batches

In [6]:
sess = tf.InteractiveSession()

dim = [num_px, 40, 20, num_class]
lambd = 0.00001
keep_prob = tf.placeholder('float')

X, Y, paras = initialization(dim)
Z3, cost = forward_cost(paras, X, Y, lambd, keep_prob)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01).minimize(cost)
correct_prediction = tf.equal(tf.argmax(Z3,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()
sess.run(init)

for i in range(500):
    mini_batches = create_mini_batches(X_train, y_train, mini_batch_size=256)
    
    for mini_batch in mini_batches:
        (X_mini_batch, y_mini_batch) = mini_batch
        optimizer.run(feed_dict={X: X_mini_batch, Y: y_mini_batch, keep_prob : 0.9})
    
    if i % 50 == 0:
        print('epoch', i)
        # don't use dropout for accuracy
        print('train batch accuracy', accuracy.eval(feed_dict={X: X_mini_batch, Y: y_mini_batch, keep_prob : 1}))
        print('train accuracy', accuracy.eval(feed_dict={X: X_train, Y: y_train, keep_prob : 1}))
        print('test accuracy', accuracy.eval(feed_dict={X: X_test, Y: y_test, keep_prob : 1}))






epoch 0
train batch accuracy 0.8125
train accuracy 0.8392
test accuracy 0.834
epoch 50
train batch accuracy 0.875
train accuracy 0.887233
test accuracy 0.8674
epoch 100
train batch accuracy 0.875
train accuracy 0.890067
test accuracy 0.8664
epoch 150
train batch accuracy 0.864583
train accuracy 0.88115
test accuracy 0.8573
epoch 200
train batch accuracy 0.895833
train accuracy 0.893133
test accuracy 0.8674
epoch 250
train batch accuracy 0.927083
train accuracy 0.895283
test accuracy 0.8661
epoch 300
train batch accuracy 0.927083
train accuracy 0.891533
test accuracy 0.8661
epoch 350
train batch accuracy 0.895833
train accuracy 0.892817
test accuracy 0.8663
epoch 400
train batch accuracy 0.927083
train accuracy 0.887383
test accuracy 0.8633
epoch 450
train batch accuracy 0.9375
train accuracy 0.897567
test accuracy 0.8683


## It achieves 86.8% test accuaracy quickly! Next step, CNN?