In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True, parser='auto')
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
X, y = mnist["data"], mnist["target"]

In [4]:
y = y.astype(np.uint8)

In [5]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

In [6]:
print(y.shape)
print(np.array(y).reshape(-1,1).shape)
y.unique()
# 아래 구문에서 오류 발생: Y = enc.transform(y[:,np.newaxis]).toarray()
# 오류: ValueError: Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer supported. Convert to a numpy array before indexing instead.
# 오류 해결을 위해 np.array로 변환 후  수정 (이후 1곳도 똑같이 수정)

(70000,)
(70000, 1)


array([5, 0, 4, 1, 9, 2, 3, 6, 7, 8], dtype=uint8)

In [7]:
enc.fit(np.array(y).reshape(-1,1))

In [8]:
# one-hot encoder 정보
print("category count:",enc.n_features_in_)
print(enc.categories_)

category count: 1
[array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)]


In [9]:
Y = enc.transform(np.array(y).reshape(-1,1)).toarray()

In [10]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], Y[:60000], Y[60000:]

In [11]:
X_train = X_train / 255
X_test = X_test / 255

In [12]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [13]:
def softmax(X, W):
    K = np.size(W, 1)
    A = np.exp(X @ W)
    B = np.diag(1 / (np.reshape(A @ np.ones((K,1)), -1)))
    Y = B @ A
    return Y

In [14]:
def compute_cost_cross_entropy(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1))
    return cost

def compute_cost_L2(X, T, W, regularization_parameter):
    
    l2_value = np.sum(np.power(W,2))
    N = len(T)
    return compute_cost_cross_entropy(X, T, W) + l2_value * regularization_parameter

    

In [15]:
def predict(X, W):
    return np.argmax((X @ W), axis=1)

In [16]:
PRINT_COST_HISTORY=True

def batch_gd(X, T, W, learning_rate, iterations, batch_size, regularization_parameter):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (softmax(X_batch, W) - T_batch))
        cost_history[i] = compute_cost_L2(X_batch, T_batch, W, regularization_parameter)
        if PRINT_COST_HISTORY & (i % 1000 == 0):
            print(cost_history[i][0])
            pass

    return (cost_history, W)

In [17]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 50000
learning_rate = 0.01

regularization_parameter = 0

initial_cost = compute_cost_L2(X, T, W, regularization_parameter)
ic_print = initial_cost[0][0]
print(f"regularization_parameter:{regularization_parameter}")
print("Initial Cost is: {} \n".format(initial_cost[0][0]))

(cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64, regularization_parameter)

regularization_parameter:0
Initial Cost is: 2.3024850979937166 

2.28250991438971
0.4710356581830463
0.38824867597273244
0.3613708383433534
0.3829786269411066
0.2851873967768732
0.332860262427166
0.4382460334298224
0.22567759952061525
0.4889926725499211
0.32923348410157516
0.6261355475818335
0.44930220207550464
0.3072275073908876
0.2813485084037038
0.3408531338995406
0.36400512793289425
0.24221161800697605
0.26393077927735986
0.22538046370661538
0.237324552209295
0.3056563381454782
0.21368572207604297
0.24584978770950297
0.39217390579870737
0.4050276053588574
0.28370945250525204
0.22989979747723593
0.4944810280305776
0.3954339394997718
0.20601481723139076
0.2533584399929705
0.39789646100190834
0.3956742349897717
0.18593230306353675
0.41856055267967124
0.2639168890575392
0.26179289363861885
0.32741418334175654
0.3226756790701145
0.26606284771737215
0.12283871200511359
0.427796971287012
0.1563126576094106
0.33999499315026793
0.30594563033896144
0.22150951011166134
0.3025119043374426
0.22

In [18]:
## Accuracy
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
T_ = y_test
y_pred = predict(X_, W_optimal)
score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))

print(score)

0.9109


In [19]:
#optimize regularization_parameter, with multithreading

PRINT_COST_HISTORY = False

import multiprocessing.dummy as mp
from multiprocessing.dummy import Pool
import threading

def test_work(regularization_parameter):
    X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
    T = y_train

    K = np.size(T, 1)
    M = np.size(X, 1)
    W = np.zeros((M,K))

    iterations = 50000
    learning_rate = 0.01

    initial_cost = compute_cost_L2(X, T, W, regularization_parameter)
    ic_print = initial_cost[0][0]
    #print(f"regularization_parameter:{regularization_parameter}")
    #print("Initial Cost is: {} \n".format(initial_cost[0][0]))
    print(f"start {regularization_parameter}")
    (cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64, regularization_parameter)
    ## Accuracy
    X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
    T_ = y_test
    y_pred = predict(X_, W_optimal)
    score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))
    return_str = f"regularization_parameter:{regularization_parameter},Initial Cost is: {ic_print},score:{score}"
    
    return (cost_history, W_optimal, score, return_str)





In [None]:
with Pool(2) as p:
    regs = [i * 0.2 for i in range(0,50)]
    result = p.map(test_work, regs)
    p.close()
    p.join()

for i in range(0,50):
    print(result[i][3])

start 1.4000000000000001
start 0.0
start 1.6
start 0.2
start 1.8
start 0.4
start 2.0
start 0.6000000000000001
start 2.2
start 0.8
start 2.4000000000000004
start 1.0
start 2.6
start 1.2000000000000002
start 2.8000000000000003
start 4.2
start 3.0
start 4.4
start 3.2
start 4.6000000000000005
start 3.4000000000000004
start 4.800000000000001
start 3.6
start 5.0
