In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
_X, _y = mnist["data"], mnist["target"]
_y = _y.astype(np.uint8)

In [4]:
X_train, X_test, y_train, y_test = _X[:60000], _X[60000:], _y[:60000], _y[60000:]

In [5]:
X_train = X_train / 255
X_test = X_test / 255

In [6]:
y_train_large = (y_train >= 7) # 목표값이 7 이상인 경우
y_train_odd = (y_train % 2 == 1) # 목표가 홀수인 경우
#만약 7인 경우 두개의 레이블이 모두 1이 될 수 있다 -> multilabel!

y_train_multilabel = (np.c_[y_train_large, y_train_odd]).astype(np.uint8)

y_test_large = (y_test >= 7)
y_test_odd = (y_test % 2 == 1)
y_test_multilabel = (np.c_[y_test_large, y_test_odd]).astype(np.uint8)

In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [8]:
def predict(X, W):
    return np.round(sigmoid(X @ W))

In [9]:
def compute_cost(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * (np.ones((1,N)) @ (np.multiply(np.log(sigmoid(X @ W) + epsilon), T)) @ np.ones((K,1)) +
                      np.ones((1,N)) @ (np.multiply(np.log(1 - sigmoid(X @ W) + epsilon), (1 - T))) @ np.ones((K,1)))
    return cost

In [10]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (sigmoid(X_batch @ W) - T_batch))
        cost_history[i] = compute_cost(X_batch, T_batch, W)
        if i % 10 == 0:
            print(cost_history[i][0])
    return (cost_history, W)

In [11]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train_multilabel

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 2000
learning_rate = 0.01

initial_cost = compute_cost(X, T, W)

print("Initial Cost is: {} \n".format(initial_cost[0][0]))

(cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 256)

Initial Cost is: 1.386254361519801 

1.3672094752553152
1.2729324608981354
1.2149409079494098
1.1634788672521683
1.1302879812623792
1.101222513849257
1.0613157202742673
1.0214995416145407
0.9890170679995822
0.9593487061817222
0.9265516042021402
0.90842405841321
0.8891809964796475
0.8893924042074142
0.8638468400602584
0.8416841409411498
0.8371215350671656
0.8237069552008781
0.8148634370551142
0.8089756148995629
0.803416918907773
0.7983371599718219
0.788394743199465
0.7699195398258656
0.7561293810963721
0.7388143905511029
0.7206240775929735
0.7087969115050697
0.7162411533936897
0.716880036343329
0.7115248377731034
0.7066551750925809
0.707111626497333
0.7070656373628549
0.7058099871305075
0.7016260020251317
0.6886698704895329
0.7000726300531518
0.7263100898170085
0.7575952018171506
0.7562178606310477
0.757369700492815
0.7519412314150076
0.7332797251254096
0.7401959452432396
0.728405275412533
0.7228959220445785
0.7214538207570009
0.714993747076993
0.7161495233616376
0.7189684699433744
0.71

In [12]:
## Accuracy
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
y_pred = predict(X_, W_optimal)
score = sum(y_pred == y_test_multilabel)/ len(y_test_multilabel)

print(score)

[0.8873 0.863 ]
