In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

In [None]:
_X, _y = mnist["data"], mnist["target"]
_y = _y.astype(np.uint8)

In [None]:
X_train, X_test, y_train, y_test = _X[:60000], _X[60000:], _y[:60000], _y[60000:]

In [None]:
X_train = X_train / 255
X_test = X_test / 255

In [None]:
y_train_large = (y_train >= 7)
y_train_odd = (y_train % 2 == 1)
y_train_multilabel = (np.c_[y_train_large, y_train_odd]).astype(np.uint8)

y_test_large = (y_test >= 7)
y_test_odd = (y_test % 2 == 1)
y_test_multilabel = (np.c_[y_test_large, y_test_odd]).astype(np.uint8)

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
def predict(X, W):
    return np.round(sigmoid(X @ W))

In [None]:
def compute_cost(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * (np.ones((1,N)) @ (np.multiply(np.log(sigmoid(X @ W) + epsilon), T)) @ np.ones((K,1)) +
                      np.ones((1,N)) @ (np.multiply(np.log(1 - sigmoid(X @ W) + epsilon), (1 - T))) @ np.ones((K,1)))
    return cost

In [None]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (sigmoid(X_batch @ W) - T_batch))
        cost_history[i] = compute_cost(X_batch, T_batch, W)
        if i % 10 == 0:
            print(cost_history[i][0])
    return (cost_history, W)

In [None]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train_multilabel

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 2000
learning_rate = 0.01

initial_cost = compute_cost(X, T, W)

print("Initial Cost is: {} \n".format(initial_cost[0][0]))

(cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 256)

In [None]:
## Accuracy
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
y_pred = predict(X_, W_optimal)
score = sum(y_pred == y_test_multilabel)/ len(y_test_multilabel)

print(score)