In [6]:
import numpy as np
from torchvision.datasets import MNIST
def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
            transform=lambda x: np.array(x).flatten(),
            download=True,
            train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return mnist_data, mnist_labels
train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 39139658.88it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 549759.66it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1441033.17it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4602688.76it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



Normalize the data and convert the labels to one-hot-encoding

In [7]:
train_X = np.array(train_X) / 255.0
test_X = np.array(test_X) / 255.0

In [8]:
def one_hot_encoding(labels):
  # labels = labels.astype(int)
  one_hot_encode = np.zeros((len(labels), 10))
  for i in range (len(labels)):
    one_hot_encode[i, labels[i]] = 1
  return one_hot_encode

train_Y = one_hot_encoding(train_Y)
test_Y = one_hot_encoding(test_Y)

In [9]:
def init_param(input, output):
  W = np.random.rand(input, output) * 0.01
  b = np.zeros((output,))
  return W, b

#print(init_param(784, 10))


Softmax, Cross Entrpy Loss, Gradient Descent

In [10]:
def softmax(x):
  exp_x = np.exp(x - np.max(x, axis = 1, keepdims = True))
  return exp_x / np.sum(exp_x, axis = 1, keepdims = True)

In [17]:
def cross_entropy(true_y, prediction_y):
  m = true_y.shape[0]
  log_p = -np.log(prediction_y[range(m), true_y.argmax(axis=1)] + 1e-9)
  loss = np.sum(log_p) / m

  return loss


In [13]:
def gradient_descent(X, true_y, prediction_y, W, b, learning_rate):
  error = true_y - prediction_y
  W += learning_rate * (np.dot(X.T, error))
  b += learning_rate * np.sum(error, axis = 0)
  return W, b

Prediction, Accuracy

In [12]:
def predict(X, W, b):
  z = np.dot(X, W) + b
  prediction_y = softmax(z)
  #return prediction_y
  return np.argmax(prediction_y, axis = 1)

In [14]:
def accuracy(X, Y, W, b):
  prediction_y = predict(X, W, b)
  true_y = np.argmax(Y, axis = 1)

  acc = np.mean(prediction_y == true_y)
  return acc

Training

In [15]:
def train(train_X, train_Y, W, b, epochs=100, batch_size=100, learning_rate=0.01):

  for epoch in range(epochs):
    i = np.random.permutation(train_X.shape[0])
    train_X_random = train_X[i]
    train_Y_random = train_Y[i]

    for i in range(0, train_X.shape[0], batch_size):
      batch_X = train_X_random[i : i + batch_size]
      batch_Y = train_Y_random[i : i + batch_size]

      #prediction_y = predict(batch_X, W, b)
      z = np.dot(batch_X, W) + b
      prediction_y = softmax(z)

      loss = cross_entropy(batch_Y, prediction_y)

      W, b = gradient_descent(batch_X, batch_Y, prediction_y, W, b, learning_rate)

    print(f"Epoch: {epoch+1} Loss: {loss}")

  return W, b


In [16]:
W, b = init_param(784,10)
trained_W, trained_b = train(train_X, train_Y, W, b, 100, 100, 0.01)

test_acc = accuracy(test_X, test_Y, trained_W, trained_b)
print(f"Test accuracy: {test_acc * 100:.2f}% ")

Epoch: 1 Loss: 0.30929166590945134
Epoch: 2 Loss: 0.4597783190294895
Epoch: 3 Loss: 0.26618713888362816
Epoch: 4 Loss: 0.4406843995642879
Epoch: 5 Loss: 0.3664150648154956
Epoch: 6 Loss: 0.30041437496228957
Epoch: 7 Loss: 0.2566326378408545
Epoch: 8 Loss: 0.15781139236378464
Epoch: 9 Loss: 0.31266051480564
Epoch: 10 Loss: 0.2583511212249546
Epoch: 11 Loss: 0.29609675155815046
Epoch: 12 Loss: 0.30092225017425384
Epoch: 13 Loss: 0.20406806047831388
Epoch: 14 Loss: 0.3120026151793684
Epoch: 15 Loss: 0.15348898703323127
Epoch: 16 Loss: 0.2753844101263681
Epoch: 17 Loss: 0.3996708442133541
Epoch: 18 Loss: 0.20697995448979878
Epoch: 19 Loss: 0.274803574839431
Epoch: 20 Loss: 0.2621752748185944
Epoch: 21 Loss: 0.25518442215339965
Epoch: 22 Loss: 0.2636244600556344
Epoch: 23 Loss: 0.4078324404160319
Epoch: 24 Loss: 0.40117248704114417
Epoch: 25 Loss: 0.2052941509180527
Epoch: 26 Loss: 0.17554440620898737
Epoch: 27 Loss: 0.27337562538306626
Epoch: 28 Loss: 0.23288719044506748
Epoch: 29 Loss: 0.