In [75]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns

In [76]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [77]:
X, y = mnist["data"], mnist["target"]

In [78]:
print(X.shape)
print(X)

(70000, 784)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [79]:
y

array(['5', '0', '4', ..., '4', '5', '6'], dtype=object)

In [80]:
y = y.astype(np.uint8) # 숫자로 변환
y

array([5, 0, 4, ..., 4, 5, 6], dtype=uint8)

In [81]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

In [82]:
enc.fit(y[:,np.newaxis])

OneHotEncoder()

In [83]:
Y = enc.transform(y[:,np.newaxis]).toarray()

In [84]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], Y[:60000], Y[60000:]

In [85]:
X_train = X_train / 255
X_test = X_test / 255

In [86]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [87]:
def softmax(X, W):
    K = np.size(W, 1)
    A = np.exp(X @ W)
    B = np.diag(1 / (np.reshape(A @ np.ones((K,1)), -1)))
    Y = B @ A
    return Y

In [104]:
def compute_cost(X, T, W, lambda_):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1)) + (lambda_) * (np.linalg.norm(W, ord=2)**2)
    return cost

In [95]:
def predict(X, W):
    return np.argmax((X @ W), axis=1)

In [106]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size, lambda_):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]
    best_cost = 1e6

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (softmax(X_batch, W) - T_batch) + 2*(lambda_)*W)
        cost_history[i] = compute_cost(X_batch, T_batch, W, lambda_)
        if i % 1000 == 0:
            # print(cost_history[i][0])
            if cost_history[i][0] < best_cost:
                best_cost = cost_history[i][0]
    
    print("Lowest cost : ", best_cost)

    return (cost_history, W)

In [108]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 50000
learning_rate = 0.01
lambdas_ = [1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1]
score_list = []

for lambda_ in lambdas_:
    initial_cost = compute_cost(X, T, W, lambda_)
    print("\n\nCase Lambda : ", lambda_)
    print("-------------------------------------------------")
    print("Initial Cost is: {} \n".format(initial_cost[0][0]))
    # if lowest_cost

    (cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64, lambda_)

    ## Accuracy
    X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
    T_ = y_test
    y_pred = predict(X_, W_optimal)
    score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))
    score_list.append(score)
    print(f"Accuracy is {score}, lambda is {lambda_}")

print(f"Result : Best Accuracy and Lambda is {max(score_list)}, {lambdas_[score_list.index(max(score_list))]}")

MemoryError: Unable to allocate 26.8 GiB for an array with shape (60000, 60000) and data type float64