In [1]:
import numpy as np
from torchvision.datasets import MNIST
def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
            transform=lambda x: np.array(x).flatten(),
            download=True,
            train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return mnist_data, mnist_labels
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 52.6MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.75MB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 14.2MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 2.76MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [2]:
train_X = np.array(train_X) / 255.0
test_X = np.array(test_X) / 255.0

In [3]:
def one_hot_encoding(labels):
  one_hot_encode = np.zeros((len(labels), 10))
  for i in range (len(labels)):
    one_hot_encode[i, labels[i]] = 1
  return one_hot_encode

train_Y = one_hot_encoding(train_Y)
test_Y = one_hot_encoding(test_Y)

In [4]:
def init_param(input, output, hidden):
  limit_hidden = np.sqrt(6 / (input + hidden))
  W_hidden = np.random.uniform(-limit_hidden, limit_hidden, (input, hidden))

  limit_output = np.sqrt(6 / (output + hidden))
  W_output = np.random.uniform(-limit_output, limit_output, (hidden, output))

  b_hidden = np.zeros((1,hidden))
  b_output = np.zeros((1,output))
  return W_hidden, W_output, b_hidden, b_output

In [5]:
def relu(x):
  return np.maximum(0, x)

def relu_deriv(x):
  return (x > 0).astype(float)

def softmax(x):
  exp_x = np.exp(x - np.max(x, axis = 1, keepdims = True))
  return exp_x / np.sum(exp_x, axis = 1, keepdims = True)

In [6]:
def cross_entropy_cost(Y,y_output, W_hidden, W_output, lambdaL1):
  m = Y.shape[0]
  cross_entropy = -np.sum(Y * np.log(y_output)) / m

  l1 = (np.sum(np.abs(W_hidden)) + np.sum(np.abs(W_output))) * lambdaL1 / m
  cost = cross_entropy + l1
  return cost

In [7]:
def forward_propagation(X, W_hidden, W_output, b_hidden, b_output):
  z_hidden = np.dot(X, W_hidden) + b_hidden
  y_hidden = relu(z_hidden)

  z_output = np.dot(y_hidden, W_output) + b_output
  y_output = softmax(z_output)

  return y_output

In [8]:
def backpropagation(X, Y, W_hidden, W_output, b_hidden, b_output, learning_rate, lambdaL1):
  #forward
  z_hidden = np.dot(X, W_hidden) + b_hidden
  y_hidden = relu(z_hidden)

  z_output = np.dot(y_hidden, W_output) + b_output
  y_output = softmax(z_output)

  #back
  dz_output = y_output - Y
  dW_output = np.dot(y_hidden.T, dz_output) / X.shape[0]
  db_output = np.sum(dz_output, axis=0, keepdims=True) / X.shape[0]

  #l1
  dW_output = dW_output + (lambdaL1 / X.shape[0] ) * np.sign(W_output)

  dz_hidden = np.dot(dz_output, W_output.T) * relu_deriv(z_hidden)
  dW_hidden = np.dot(X.T, dz_hidden) / X.shape[0]
  db_hidden = np.sum(dz_hidden, axis=0, keepdims=True) / X.shape[0]

  #l1
  dW_hidden = dW_hidden + (lambdaL1 / X.shape[0] ) * np.sign(W_hidden)


  W_hidden = W_hidden - learning_rate * dW_hidden
  b_hidden = b_hidden - learning_rate * db_hidden
  W_output = W_output - learning_rate * dW_output
  b_output = b_output - learning_rate * db_output

  return W_hidden, W_output, b_hidden, b_output


In [10]:
def train(train_X, train_Y, W_hidden, W_output, b_hidden, b_output, epochs=100, batch_size=100, learning_rate=0.01, lambdaL1=0.01):

  for epoch in range(epochs):
    i = np.random.permutation(train_X.shape[0])
    train_X_random = train_X[i]
    train_Y_random = train_Y[i]

    epoch_cost = 0
    for i in range(0, train_X.shape[0], batch_size):
      batch_X = train_X_random[i : i + batch_size]
      batch_Y = train_Y_random[i : i + batch_size]

      y_output = forward_propagation(batch_X,W_hidden, W_output, b_hidden, b_output)

      batch_cost = cross_entropy_cost(batch_Y, y_output, W_hidden, W_output, lambdaL1)
      epoch_cost += batch_cost

      W_hidden,W_output, b_hidden, b_output = backpropagation(batch_X, batch_Y, W_hidden,  W_output, b_hidden, b_output, learning_rate, lambdaL1)

    epoch_cost = epoch_cost / (train_X.shape[0] / batch_size)
    print(f"Epoch: {epoch+1} Cost:{epoch_cost} ")

  return W_hidden, W_output, b_hidden, b_output

In [9]:
def accuracy(test_X, test_Y, W_hidden, W_output, b_hidden, b_output):
  y_output = forward_propagation(test_X, W_hidden, W_output, b_hidden, b_output)

  prediction = np.argmax(y_output, axis=1)
  true_Y = np.argmax(test_Y, axis=1)

  accuracy = np.mean(prediction == true_Y)
  return accuracy * 100

In [11]:
W_hidden, W_output, b_hidden, b_output = init_param(784,10,100)
trained_W_hidden, trained_W_output, trained_b_hidden, trained_b_output = train(train_X, train_Y, W_hidden, W_output, b_hidden, b_output, 100, 100, 0.01, 0.01)

test_acc = accuracy(test_X, test_Y, trained_W_hidden, trained_W_output, trained_b_hidden, trained_b_output)
print(f"Test accuracy: {test_acc:.2f}% ")

Epoch: 1 Cost:1.4621186182559152 
Epoch: 2 Cost:0.8481272472719671 
Epoch: 3 Cost:0.7464820228252254 
Epoch: 4 Cost:0.6996455573161321 
Epoch: 5 Cost:0.6697219673419479 
Epoch: 6 Cost:0.6473410541388831 
Epoch: 7 Cost:0.6289973691306571 
Epoch: 8 Cost:0.6133055800262701 
Epoch: 9 Cost:0.5993622210971268 
Epoch: 10 Cost:0.58665507620268 
Epoch: 11 Cost:0.5749691468881688 
Epoch: 12 Cost:0.5638325170551578 
Epoch: 13 Cost:0.5536765270124048 
Epoch: 14 Cost:0.5439410291358128 
Epoch: 15 Cost:0.5345911317551358 
Epoch: 16 Cost:0.525618731315379 
Epoch: 17 Cost:0.5171362249367314 
Epoch: 18 Cost:0.5088953032184427 
Epoch: 19 Cost:0.5009791419429787 
Epoch: 20 Cost:0.4932791975787879 
Epoch: 21 Cost:0.48605353085189373 
Epoch: 22 Cost:0.4788425407616518 
Epoch: 23 Cost:0.47191474247435505 
Epoch: 24 Cost:0.4652562493572265 
Epoch: 25 Cost:0.4587453865755533 
Epoch: 26 Cost:0.45251994211139696 
Epoch: 27 Cost:0.4463388086846712 
Epoch: 28 Cost:0.4402669630351203 
Epoch: 29 Cost:0.434552778550