In [118]:
# Read Fashion MNIST dataset

import util_mnist_reader
X_train, y_train = util_mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = util_mnist_reader.load_mnist('data/fashion', kind='t10k')

# Your code goes here . . .
import matplotlib.pyplot as plt #for image visualization as of now

In [119]:
# Checking shapes of the arrays
print X_train.shape, y_train.shape, X_test.shape, y_test.shape

(60000, 784) (60000,) (10000, 784) (10000,)


In [120]:
y_train = y_train.reshape(y_train.shape[0], 1)
y_test = y_test.reshape(y_test.shape[0], 1)
print X_train.shape, y_train.shape, X_test.shape, y_test.shape

(60000, 784) (60000, 1) (10000, 784) (10000, 1)


In [121]:
# Visualization of the images
#X_train = X_train.reshape([-1, 28, 28, 1])
#plt.imshow(1-X_train[0][:, :, 0], cmap='cool')

In [122]:
# Normalization of Data
X_train = X_train.astype(float) / 255
X_test = X_test.astype(float) / 255
#print X_train[0]

In [123]:
import tensorflow as tf

In [124]:
#defining sigmoid function and sigmoid derivative
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

def softmax(z):
    return (np.exp(z.T) / np.sum(np.exp(z), axis=1)).T
    #return (np.exp(z.T) / np.sum(np.exp(z), axis=0)).T

In [125]:
# Hyperparameters
epochs = 50
input_size, nodes, output_size = 784, 60, 10 # for weights, hidden_size is nodes on hidden layer
learning_rate = 0.1 # learning rate

In [126]:
#initialize weights
import numpy as np
w_hidden = np.random.randn(60,784) * 0.01
w_output = np.random.randn(10,60) * 0.01

In [127]:
def L2_reg(lambda_, w1, w2):
    return (lambda_ / 2.0) * (np.sum(w1 ** 2) + np.sum(w2 ** 2))


def L1_reg(lambda_, w1, w2):
    return (lambda_ / 2.0) * (np.abs(w1).sum() + np.abs(w2).sum())


def cross_entropy(outputs, y_target):
    return -np.sum(np.log(outputs) * y_target, axis=1)

In [128]:
# to convert y into (60000,10)
def target_matrix (y):
    matrix = np.zeros((len(y), 10))
    for i, val in enumerate(y):
        matrix[i, val] = 1
    return matrix
#print y_train.shape
#print y_train
print target_matrix (y_train).shape

def mle(y, axis=1):
    return np.argmax(y, axis)
#print mle(one_hot(y_train,10), axis=1)

(60000, 10)


In [129]:
class NNClassifier:

    def __init__(self, n_classes, n_features, n_hidden_units=30,
                 l1=0.0, l2=0.0, epochs=500, learning_rate=0.01,
                 n_batches=1, random_seed=None):

        if random_seed:
            np.random.seed(random_seed)
        self.n_classes = n_classes
        self.n_features = n_features
        self.n_hidden_units = n_hidden_units
        global w_hidden, w_output
        self.w1, self.w2 = w_hidden, w_output
        self.l1 = l1
        self.l2 = l2
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.n_batches = n_batches

    def forward(self, X):
        net_input = X.copy()
        net_hidden = np.dot(w_hidden, net_input.T)
        act_hidden = sigmoid(net_hidden)
        net_out = np.dot(w_output, act_hidden)
        act_out = softmax(net_out)
        return net_input, net_hidden, act_hidden, net_out, act_out

    def backward(self, net_input, net_hidden, act_hidden, act_out, y):
        sigma3 = act_out - y
        sigma2 = np.dot(w_output.T, sigma3) * sigmoid_derivative(net_hidden)
        grad1 = np.dot(sigma2, net_input)
        grad2 = np.dot(sigma3, act_hidden.T)
        return grad1, grad2
    
    def error(self, y, output):
        L1_term = L1_reg(self.l1, w_hidden, w_output)
        L2_term = L2_reg(self.l2, w_hidden, w_output)
        error = cross_entropy(output, y) + L1_term + L2_term
        return 0.5 * np.mean(error)

    def backprop_step(self, X, y):
        net_input, net_hidden, act_hidden, net_out, act_out = self.forward(X)
        y = y.T

        grad1, grad2 = self.backward(net_input, net_hidden, act_hidden, act_out, y)

        # regularize
        grad1 += (w_hidden * (self.l1 + self.l2))
        grad2 += (w_output * (self.l1 + self.l2))

        error = self.error(y, act_out)
        
        return error, grad1, grad2
    
    def predict(self, X):
        Xt = X.copy()
        net_input, net_hidden, act_hidden, net_out, act_out = self.forward(Xt)
        return mle(net_out.T)
    
    def predict_proba(self, X):
        Xt = X.copy()
        net_input, net_hidden, act_hidden, net_out, act_out = self.forward(Xt)
        return softmax(act_out.T)

    def fit(self, X, y):
        error_ = []
        X_data, y_data = X.copy(), y.copy()
        y_data_enc = target_array(y_data, 10)
                
        X_mbs = np.array_split(X_data, 1)
        y_mbs = np.array_split(y_data_enc, 1)
        
        global w_hidden, w_output
        
        for i in range(epochs):
            
            epoch_errors = []

            for Xi, yi in zip(X_mbs, y_mbs):
                
                # update weights
                error, grad1, grad2 = self.backprop_step(Xi, yi)
                epoch_errors.append(error)
                w_hidden -= (learning_rate * grad1)
                w_output -= (learning_rate * grad2)
            error_.append(np.mean(epoch_errors))
        return self
    
    def score(X, y):
        y_hat = self.predict(X)
        return np.sum(y == y_hat, axis=0) / float(X.shape[0])

In [130]:
nn = NNClassifier(
    n_classes=10, 
    n_features=784,
    n_hidden_units=50,
    l2=0.5,
    l1=0.0,
    epochs=300,
    learning_rate=0.001,
    n_batches=25,
    random_seed=42
).fit(X_train, y_train);

  if __name__ == '__main__':


In [131]:
print 'Train Accuracy:', nn.score(X_train, y_train) * 100
print 'Test Accuracy:' ,nn.score(X_test, y_test) * 100

Train Accuracy:

TypeError: score() takes exactly 2 arguments (3 given)

In [2]:
import numpy as np
print np.random.uniform(-1.0, 1.0, size=(60, 784))

[[-0.01345512 -0.9369294  -0.82336186 ... -0.3664047  -0.68358932
   0.58245671]
 [-0.89142849  0.4308243   0.22172297 ...  0.01561464 -0.17034049
  -0.07581876]
 [-0.59691113 -0.20518226 -0.81814229 ...  0.10707359 -0.51353389
  -0.15976466]
 ...
 [-0.27032991 -0.007482   -0.0901324  ... -0.11335995 -0.47626236
  -0.2305832 ]
 [-0.94092665  0.05081654  0.97845798 ...  0.31065769 -0.62696234
   0.64605745]
 [-0.56552509 -0.20342645  0.40787213 ...  0.47501014  0.99702749
   0.83957881]]


In [None]:
import numpy as np
b1 = np.zeros((1, 6))
b2 = np.zeros(1, 6)
print b1.shape, b2.shape