In [1]:
import urllib.request
import os
import pickle
import gzip
import numpy as np

In [2]:
def load_data():
    with gzip.open("../data/mnist.pkl.gz", 'rb') as f:
        train_data, val_data, test_data = pickle.load(f, encoding="latin1")
    train_inputs = [np.reshape(x, (28 * 28, 1)) for x in train_data[0]] + \
                   [np.reshape(x, (28 * 28, 1)) for x in val_data[0]]
    train_labels = [vectorized_result(y) for y in train_data[1]] + \
                   [vectorized_result(y) for y in val_data[1]]
   # valid_inputs = [np.reshape(x, (28*28, 1)) for x in val_data[0]]
   # valid_labels = [vectorized_result(y) for y in val_data[1]]
    test_inputs = [np.reshape(x, (28*28, 1)) for x in test_data[0]]
    test_labels = [vectorized_result(y) for y in test_data[1]]
    return (train_inputs, train_labels), (test_inputs, test_labels)  # (valid_inputs, valid_labels),

def vectorized_result(i):
    label = np.zeros((10, 1))
    label[i] = 1.0
    return label

In [4]:
(x1, y1), (t1, h1) = load_data()

In [7]:
x1[0].shape

(784, 1)

In [26]:
t1[0].shape

(784, 1)

In [9]:
url_base = 'http://yann.lecun.com/exdb/mnist/'
key_files = ['train-images-idx3-ubyte.gz',
            'train-labels-idx1-ubyte.gz',
            't10k-images-idx3-ubyte.gz',
            't10k-labels-idx1-ubyte.gz']

save_file = os.path.join("mnist_download/", "mnist.pkl")

train_num = 60000
test_num = 10000
img_dim = (1, 28, 28)
img_size = 784

In [10]:
save_file

'mnist_download/mnist.pkl'

In [11]:
with gzip.open('mnist_download/train-images-idx3-ubyte.gz', 'rb') as f:
    data = np.frombuffer(f.read(), np.uint8, offset=16)

In [12]:
data.shape

(47040000,)

In [14]:
data.shape[0] / 784

60000.0

In [21]:
data_p = []
for i in range(0, len(data), 784):
    data_p.append(np.reshape(data[i: i+784], (784, 1)))

In [23]:
data_p[0].shape

(784, 1)

In [28]:
with gzip.open('mnist_download/train-labels-idx1-ubyte.gz', 'rb') as f:
    data = np.frombuffer(f.read(), np.uint8, offset=8)

In [32]:
y1[:2]

[array([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.]]), array([[1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])]

In [30]:
data[:10]

array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=uint8)

In [31]:
len(data)

60000

In [34]:
ys = []
for i in range(len(data)):
    T = np.zeros((10, 1))
    T[data[i]] = 1.0
    ys.append(T)

In [35]:
ys[:2]

[array([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.]]), array([[1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])]

## Cifar10

In [1]:
import pickle, sys, random
import numpy as np

In [3]:
with open('cifar10/data_batch_1', 'rb') as f:
    aa = pickle.load(f, encoding='bytes')

In [4]:
type(aa)

dict

In [13]:
aa.keys()

dict_keys([b'labels', b'data', b'batch_label', b'filenames'])

In [57]:
data, labels = [], []
for i in range(1, 6):
    with open('cifar10/data_batch_%d' % i, 'rb') as f:
        whole = pickle.load(f, encoding='bytes')
        data.extend(whole[b'data'])
        labels.extend(whole[b'labels'])

In [58]:
test_data, test_labels = [], []
with open('cifar10/test_batch', 'rb') as f:
    whole = pickle.load(f, encoding='bytes')
    test_data = whole[b'data']
    test_labels = np.array(whole[b'labels'])

In [59]:
X_train = [np.reshape(row, (3072, 1)) for row in data]
X_test = [np.reshape(row, (3072, 1)) for row in test_data]
two_dim = np.eye(10)[np.array(labels, dtype=np.int32)]
two_dim_test = np.eye(10)[np.array(test_labels, dtype=np.int32)]
y_train = [row.reshape(10, 1) for row in two_dim]
y_test = [row.reshape(10, 1) for row in two_dim_test]

In [66]:
class Network:
    def __init__(self, sizes=[100, 100], activation="relu", dropout_rate=0.0):
        """
        :param sizes: list of layers
        :param activations: activation_functions
        """
        self.sizes = sizes
        self.num_layers = len(sizes)
        self.weights = [np.random.randn(back_layer, forward_layer) * np.sqrt(2.0 / forward_layer) \
                        for forward_layer, back_layer in zip(sizes[:-1], sizes[1:])]
        self.biases = [np.random.randn(back_layer, 1) for back_layer in sizes[1:]]
        self.dropout_rate = dropout_rate

        # TODO  activation_functions = {'sigmoid': sigmoid, 'relu': relu} tanh
        if activation.lower() == "sigmoid":
            self.activation = Network.sigmoid
            self.activation_derivative = Network.sigmoid_derivative
        elif activation.lower() == "relu":
            self.activation = Network.relu
            self.activation_derivative = Network.relu_derivative

    def predict(self, a):
        for w, b in zip(self.weights[:-1], self.biases[:-1]):
            a = self.activation(np.dot(w, a) + b)
            a *= (1.0 - self.dropout_rate)  ######### test dropout
        a = np.dot(self.weights[-1], a) + self.biases[-1]
        return a

    def backprop(self, x, y):
        gradient_w = [np.zeros(w.shape) for w in self.weights]
        gradient_b = [np.zeros(b.shape) for b in self.biases]

        # forward pass #
        a = x
        a_hold = [x]
        z_hold = []
        for w, b in zip(self.weights[:-1], self.biases[:-1]):
            z = np.dot(w, a) + b

            self.mask = np.random.rand(*z.shape) > self.dropout_rate
            z *= self.mask
        #    z /= (1 - self.dropout_rate)

            a = self.activation(z)
            z_hold.append(z)
            a_hold.append(a)
        final_layer = np.dot(self.weights[-1], a) + self.biases[-1]
        z_hold.append(final_layer)
        a_hold.append(Network.softmax(final_layer))

        # backward pass#
        delta = Network.softmax_derivative(a_hold[-1], y)
        gradient_w[-1] = np.dot(delta, a_hold[-2].T)
        gradient_b[-1] = delta

        for l in range(2, self.num_layers):
            delta = np.dot(self.weights[-l + 1].T, delta) * self.activation_derivative(z_hold[-l])
            gradient_w[-l] = np.dot(delta, a_hold[-l - 1].T)
            gradient_b[-l] = delta

        return gradient_w, gradient_b


    @staticmethod
    def sigmoid(z):
        return 1.0 / (1.0 + np.exp(-z))

    @staticmethod
    def sigmoid_derivative(z):
        return Network.sigmoid(z) * (1 - Network.sigmoid(z))

    @staticmethod
    def relu(z):
        return np.maximum(z, 0)

    @staticmethod
    def relu_derivative(z):
        mask = (z <= 0)
        dout = np.ones(z.shape)
        dout[mask] = 0.0
        return dout

    @staticmethod
    def softmax(z):
        z = z - np.max(z)
        return np.exp(z) / np.sum(np.exp(z))

    @staticmethod
    def softmax_batch(z):
        z = z.T
        z = z - np.max(z, axis=0)
        t = np.exp(z) / np.sum(np.exp(z), axis=0)
        return t.T

    @staticmethod
    def softmax_derivative(a, b):
        return a - b

In [67]:
def train_DNN(X_train, y_train, num_epochs, learning_rate, network, X_val=None, y_val=None):
    gradient_w = [np.zeros(w.shape) for w in network.weights]
    gradient_b = [np.zeros(b.shape) for b in network.biases]
    
    for epoch in range(num_epochs):
        index = np.arange(len(X_train))
        random.shuffle(index)
     #   random_mask = np.random.choice(len(X_train), len(X_train), replace=False)
        for idx in index:
            add_w, add_b = network.backprop(X_train[idx], y_train[idx])
            network.weights = [weight - learning_rate * gw for weight, gw in zip(network.weights, add_w)]
            network.biases = [bias - learning_rate * gb for bias, gb in zip(network.biases, add_b)]

         #   gradient_w = [gw + aw for gw, aw in zip(gradient_w, add_w)]
         #   gradient_b = [gb + ab for gb, ab in zip(gradient_b, add_b)]
         #   network.weights = [weight - learning_rate * gw for weight, gw in zip(network.weights, gradient_w)]
        #    network.biases = [bias - learning_rate * gb for bias, gb in zip(network.biases, gradient_b)]

        if X_val:
            print("Epoch {0}, training_accuracy: {1},\t validation accuracy: {2}".
                  format(epoch + 1, evaluate(X_train, y_train, network), evaluate(X_val, y_val, network)))
        else:
            print("Epoch {0}, training_accuracy: {1}".
                  format(epoch + 1, evaluate(X_train, y_train, network)))

def evaluate(X_val, y_val, network):
    y_pred = [np.argmax(network.predict(x)) for x in X_val]
    return np.mean([int(y_p == np.argmax(y)) for y_p, y in zip(y_pred, y_val)])

In [None]:
dnn = Network(sizes=[3072, 50, 10], activation="relu", dropout_rate=0.5)
train_DNN(X_train, y_train, 30, 0.01, dnn, X_test, y_test)

Epoch 1, training_accuracy: 0.1,	 validation accuracy: 0.1
Epoch 2, training_accuracy: 0.1,	 validation accuracy: 0.1
