In [1]:
%matplotlib inline

import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample
from miniflow import *
import matplotlib.pyplot as plt
import gzip
import pickle
import os

In [2]:
train_img_file = 'mnist_data/train-images-idx3-ubyte.gz'
train_label_file = 'mnist_data/train-labels-idx1-ubyte.gz'
test_img_file = 'mnist_data/t10k-images-idx3-ubyte.gz'
test_label_file = 'mnist_data/t10k-labels-idx1-ubyte.gz'

save_file = 'mnist_data/mnist.pkl'

In [3]:
def read_image(fname):
    with gzip.open(fname, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1, 784)
    
    return data

def read_label(fname):
    with gzip.open(fname, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
        
    return data

def create_pkl(save_file, data):
    if os.path.exists(save_file):
        return
    
    with open(save_file, 'wb') as f:
        pickle.dump(data, f)

In [4]:
mnist_data = {}
mnist_data['train_img'] = read_image(train_img_file)
mnist_data['train_label'] = read_label(train_label_file)
mnist_data['test_img'] = read_image(test_img_file)
mnist_data['test_label'] = read_label(test_label_file)

create_pkl(save_file, mnist_data)

In [5]:
# Load data
with open(save_file, 'rb') as f:
    data = pickle.load(f)

X_train, y_train = data['train_img'], data['train_label']
X_test, y_test = data['test_img'], data['test_label']

In [6]:
def normalized(data):
    data = data.astype(np.float32)
    return data / 255.

In [7]:
# Normalized data
X_train = normalized(X_train)
X_test = normalized(X_test)

In [8]:
def one_hot(data, n):
    one_hot_encoding = np.zeros([data.shape[-1], n])
    one_hot_encoding[np.arange(data.shape[-1]), data] = 1 
    
    return one_hot_encoding

In [9]:
#y_train = one_hot(y_train, 10)
#y_test = one_hot(y_test, 10)

In [10]:
# Split data
t = int(X_train.shape[0] * 0.9)
X_train, X_validation = X_train[:t], X_train[t:]
y_train, y_validation = y_train[:t], y_train[t:]

print("Train: ", X_train.shape, y_train.shape)
print("Test: ", X_test.shape, y_test.shape)
print("Validation: ", X_validation.shape, y_validation.shape)

('Train: ', (54000, 784), (54000,))
('Test: ', (10000, 784), (10000,))
('Validation: ', (6000, 784), (6000,))


In [11]:
n_input = X_train.shape[1]
n_output = 10
n_hidden1 = 64
n_hidden2 = 128

In [12]:
W1_ = np.random.randn(n_input, n_hidden1)
b1_ = np.random.randn(n_hidden1)

W2_ = np.random.randn(n_hidden1, n_hidden2)
b2_ = np.random.randn(n_hidden2)

W3_ = np.random.randn(n_hidden2, n_output)
b3_ = np.random.randn(n_output)

In [13]:
X, y = Input(name='X'), Input(name='y')  
W1, b1 = Input(name='W1'), Input(name='b1')  
W2, b2 = Input(name='W2'), Input(name='b2')  
W3, b3 = Input(name='W3'), Input(name='b3')  

In [14]:
l1 = Linear(X, W1, b1, name='l1')
s1 = Sigmoid(l1, name='s1')
l2 = Linear(s1, W2, b2, name='l2')
s2 = Sigmoid(l2, name='s2')
l3 = Linear(s2, W3, b3, name='l3')
cost = SoftmaxCrossEntropy(y, l3, name='loss')

In [15]:
feed_dict = {
    X: X_train,
    y: y_train,
    W1: W1_,
    b1: b1_,
    W2: W2_,
    b2: b2_,
    W3: W3_,
    b3: b3_
}

In [16]:
# Hyperparameter
epochs = 5000
lr = 0.001
batch_size = 100

In [17]:
m = X_train.shape[0]
steps_per_epoch = m // batch_size
graph = topological_sort(feed_dict)
trainables = [W1, b1, W2, b2, W3, b3]
losses = {'train': [], 'validation': []}

In [None]:
#Training
for i in range(epochs):
    train_loss = 0
    val_loss = 0
    for j in range(steps_per_epoch):
        X_batch, y_batch = resample(X_train, y_train, n_samples=batch_size)
        X.value, y.value = X_batch, y_batch
        
        forward_and_backward(graph)
        sgd_update(trainables, lr)
        train_loss += graph[-1].value
        
    train_loss = train_loss / steps_per_epoch
    losses['train'].append(train_loss)
    
    X.value, y.value = X_validation, y_validation
    forward_and_backward(graph, training=False)
    val_loss = graph[-1].value
    losses['validation'].append(val_loss)
    
    #if (i+1) % batch_size == 0:
    print("Epoch: {}, Train Loss: {:.3f}, Validation Loss: {:.3f}".format(i+1, train_loss, val_loss))

fig, ax = plt.subplots()
x = np.arange(len(losses['train']))
ax.plot(x, losses['train'], label='Train')
ax.plot(x, losses['validation'], label='Validation')
ax.legend()

Epoch: 1, Train Loss: 6.143, Validation Loss: 4.813
Epoch: 2, Train Loss: 4.369, Validation Loss: 4.040
Epoch: 3, Train Loss: 3.897, Validation Loss: 3.695
Epoch: 4, Train Loss: 3.598, Validation Loss: 3.411
Epoch: 5, Train Loss: 3.345, Validation Loss: 3.167
Epoch: 6, Train Loss: 3.125, Validation Loss: 2.954
Epoch: 7, Train Loss: 2.920, Validation Loss: 2.769
Epoch: 8, Train Loss: 2.762, Validation Loss: 2.609
Epoch: 9, Train Loss: 2.607, Validation Loss: 2.468
Epoch: 10, Train Loss: 2.496, Validation Loss: 2.345
Epoch: 11, Train Loss: 2.385, Validation Loss: 2.237
Epoch: 12, Train Loss: 2.275, Validation Loss: 2.140
Epoch: 13, Train Loss: 2.198, Validation Loss: 2.054
Epoch: 14, Train Loss: 2.120, Validation Loss: 1.978
Epoch: 15, Train Loss: 2.062, Validation Loss: 1.909
Epoch: 16, Train Loss: 1.987, Validation Loss: 1.846
Epoch: 17, Train Loss: 1.928, Validation Loss: 1.789
Epoch: 18, Train Loss: 1.876, Validation Loss: 1.737
Epoch: 19, Train Loss: 1.812, Validation Loss: 1.689
Ep

Epoch: 155, Train Loss: 0.732, Validation Loss: 0.644
Epoch: 156, Train Loss: 0.734, Validation Loss: 0.643
Epoch: 157, Train Loss: 0.726, Validation Loss: 0.641
Epoch: 158, Train Loss: 0.725, Validation Loss: 0.639
Epoch: 159, Train Loss: 0.731, Validation Loss: 0.638
Epoch: 160, Train Loss: 0.729, Validation Loss: 0.636
Epoch: 161, Train Loss: 0.732, Validation Loss: 0.634
Epoch: 162, Train Loss: 0.731, Validation Loss: 0.633
Epoch: 163, Train Loss: 0.724, Validation Loss: 0.631
Epoch: 164, Train Loss: 0.731, Validation Loss: 0.630
Epoch: 165, Train Loss: 0.721, Validation Loss: 0.628
Epoch: 166, Train Loss: 0.709, Validation Loss: 0.627
Epoch: 167, Train Loss: 0.714, Validation Loss: 0.625
Epoch: 168, Train Loss: 0.712, Validation Loss: 0.623
Epoch: 169, Train Loss: 0.709, Validation Loss: 0.622
Epoch: 170, Train Loss: 0.700, Validation Loss: 0.621
Epoch: 171, Train Loss: 0.712, Validation Loss: 0.619
Epoch: 172, Train Loss: 0.703, Validation Loss: 0.618
Epoch: 173, Train Loss: 0.71

In [None]:
# Test
feed_dict[X] = X_test
feed_dict[y] = y_test

graph = topological_sort(feed_dict)
forward_and_backward(graph, training=False)
loss = graph[-1].value
print("Test loss: {:.3f}".format(loss))