In [18]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns

In [19]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [20]:
X, y = mnist["data"], mnist["target"]

In [21]:
y = y.astype(np.uint8)

In [22]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

In [23]:
enc.fit(y[:,np.newaxis])

  enc.fit(y[:,np.newaxis])


OneHotEncoder()

In [24]:
Y = enc.transform(y[:,np.newaxis]).toarray()

  Y = enc.transform(y[:,np.newaxis]).toarray()


In [25]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], Y[:60000], Y[60000:]

In [26]:
X_train = X_train / 255
X_test = X_test / 255

In [27]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [28]:
def softmax(X, W):
    K = np.size(W, 1)
    A = np.exp(X @ W)
    B = np.diag(1 / (np.reshape(A @ np.ones((K,1)), -1)))
    Y = B @ A
    return Y

In [29]:
def compute_cost(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1)) 
    return cost

In [30]:
def predict(X, W):
    return np.argmax((X @ W), axis=1)

In [31]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size, lamb):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (softmax(X_batch, W) - T_batch) + 2*lamb*W)
        cost_history[i] = compute_cost(X_batch, T_batch, W)
        # if i % 1000 == 0:
        #     print(cost_history[i][0])

    return (cost_history, W)

In [34]:
iterations = 50000
learning_rate = 0.01

# initial_cost = compute_cost(X, T, W)
# print("Initial Cost is: {} \n".format(initial_cost[0][0]))

lamb = 0.0001

while lamb<1:
    T = y_train
    K = np.size(T, 1)
    M = np.size(X, 1)
    W = np.zeros((M,K))
    X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))

    (cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64, lamb)

    X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
    T_ = y_test
    y_pred = predict(X_, W_optimal)
    score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))

    print("규제화항:", lamb,"\t 정확도:", score, "\t L2 norm:", int(np.ones((1,M)) @ W_optimal**2 @ np.ones((K,1))))
    lamb *= 2

규제화항: 0.0001 	 정확도: 0.9144 	 L2 norm: 123
규제화항: 0.0002 	 정확도: 0.912 	 L2 norm: 123
규제화항: 0.0004 	 정확도: 0.9178 	 L2 norm: 125
규제화항: 0.0008 	 정확도: 0.912 	 L2 norm: 122
규제화항: 0.0016 	 정확도: 0.9179 	 L2 norm: 122
규제화항: 0.0032 	 정확도: 0.9117 	 L2 norm: 121
규제화항: 0.0064 	 정확도: 0.9131 	 L2 norm: 115
규제화항: 0.0128 	 정확도: 0.9074 	 L2 norm: 108
규제화항: 0.0256 	 정확도: 0.9087 	 L2 norm: 96
규제화항: 0.0512 	 정확도: 0.9096 	 L2 norm: 77
규제화항: 0.1024 	 정확도: 0.9112 	 L2 norm: 58
규제화항: 0.2048 	 정확도: 0.8954 	 L2 norm: 39
규제화항: 0.4096 	 정확도: 0.8947 	 L2 norm: 26
규제화항: 0.8192 	 정확도: 0.8776 	 L2 norm: 17


In [22]:
## Accuracy
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
T_ = y_test
y_pred = predict(X_, W_optimal)
score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))

print(score)

0.9179
