In [17]:
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from fashion_mnist_master.utils import mnist_reader
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.datasets import fashion_mnist as fmn
import time

# Problem
Build a fully connected neural network in Keras to classify the images in Fashion-MNIST using one hidden layer of 5 neurons and a softmax output layer.  So the structure is 

    x_1             a_1
    x_2             a_2
    x_3             a_3
    .               a_4                softmax
    .               a_5
    .
    .
    x_728
    
where every $x_i$ is connected to every $a_i$, and every $a_i$ is connected to the softmax. Each $x_i$ corresponds to one pixel in an input image and each $a_i$ is a "hidden" neuron.  (note that the fashion-MNIST dataset is available in Keras: 

In [26]:
# Load dataset.
x_train, y_train = mnist_reader.load_mnist('fashion_mnist_master/data/fashion', kind='train')
x_test, y_test = mnist_reader.load_mnist('fashion_mnist_master/data/fashion', kind='t10k')

1. Build and train the network using ReLU as the activation function for the hidden layer.
2. Build and train the network again using sigmoid as the activation function for the hidden layer. 

In [27]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)
y_train = keras.utils.to_categorical(y_train, num_classes=10)
y_test = keras.utils.to_categorical(y_test, num_classes=10)



In [42]:
start = time.time()
relu = Sequential()
relu.add (Dense(5, input_dim=x_train.shape[1], activation='relu'))
relu.add(Dense(10, activation='softmax'))
relu.compile(loss='categorical_crossentropy', 
             optimizer='adam', metrics=['accuracy'])
relu.fit(x_train, y_train, epochs=40, verbose=0, 
                       batch_size=x_train.shape[0], initial_epoch=0)

relu_time = time.time() - start

In [44]:
start = time.time()
sigmoid = Sequential()
sigmoid.add(Dense(5,input_dim=x_train.shape[1], activation='sigmoid'))
sigmoid.add(Dense(10,activation='softmax'))
sigmoid.compile(loss='categorical_crossentropy',
             optimizer='adam', metrics=['accuracy'])
sigmoid.fit(x_train, y_train, epochs=40, verbose=0, 
            batch_size=x_train.shape[0], initial_epoch=0)
sigmoid_time = time.time() - start

In [48]:
relu_result = relu.evaluate(x_test, y_test, verbose=0)[1]
sigmoid_result = sigmoid.evaluate(x_test, y_test, verbose=0)[1]

In [51]:
print("\tACCURACY\tTIME")
print("ReLU\t{}\t\t{}".format(relu_result, relu_time))
print("Sigmoid\t{}\t\t{}".format(sigmoid_result, sigmoid_time))

	ACCURACY	TIME
ReLU	0.4025		15.131405591964722
Sigmoid	0.1792		15.115326166152954


Compare the training time and the accuracy of your two neural network to each other and to the training time and accuracy of the XGBoost classifier you built previously. 

XGBoost Results:<br><br>
Best parameters: <br> 
gamma: 0.6666666666666666 <br>
learning_rate: 0.7 <br><br>
Best Score: 0.8898

I didn't record how long the XGBoost took to train but I know it was WAAAAAY longer than these neural nets.