In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns

In [3]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [4]:
X, y = mnist["data"], mnist["target"]

In [5]:
y = y.astype(np.uint8)

In [6]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

In [7]:
enc.fit(y[:,np.newaxis])

  enc.fit(y[:,np.newaxis])


OneHotEncoder()

In [8]:
Y = enc.transform(y[:,np.newaxis]).toarray()

  Y = enc.transform(y[:,np.newaxis]).toarray()


In [9]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], Y[:60000], Y[60000:]

In [10]:
X_train = X_train / 255
X_test = X_test / 255

In [11]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [98]:
def softmax(X, W):
    K = np.size(W, 1)
    A = np.exp(X @ W)
    B = np.diag(1 / (np.reshape(A @ np.ones((K,1)), -1)))
    Y = B @ A
    return Y

In [99]:
def compute_cost(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1))
    return cost

In [100]:
def predict(X, W):
    return np.argmax((X @ W), axis=1)

In [101]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (softmax(X_batch, W) - T_batch))
        cost_history[i] = compute_cost(X_batch, T_batch, W)
        if i % 1000 == 0:
            print(cost_history[i][0])

    return (cost_history, W)

In [102]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 50000
learning_rate = 0.01

initial_cost = compute_cost(X, T, W)

print("Initial Cost is: {} \n".format(initial_cost[0][0]))

(cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 60000 is different from 600000)

In [17]:
## Accuracy
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
T_ = y_test
y_pred = predict(X_, W_optimal)
score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))

print(score)

0.9137


In [18]:
initial_cost

array([[2.3024851]])

In [20]:
epsilon = 1e-5
N = len(T)
K = np.size(T, 1)
cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1))

In [22]:
epsilon

1e-05

In [24]:
K

10

In [34]:
ss1=(1/N) * np.ones((1,N))
ss1.shape

(1, 60000)

In [50]:
ss =(np.multiply(np.log(softmax(X, W) + epsilon), T))
ss

array([[-0.       , -0.       , -0.       , ..., -0.       , -0.       ,
        -0.       ],
       [-2.3024851, -0.       , -0.       , ..., -0.       , -0.       ,
        -0.       ],
       [-0.       , -0.       , -0.       , ..., -0.       , -0.       ,
        -0.       ],
       ...,
       [-0.       , -0.       , -0.       , ..., -0.       , -0.       ,
        -0.       ],
       [-0.       , -0.       , -0.       , ..., -0.       , -0.       ,
        -0.       ],
       [-0.       , -0.       , -0.       , ..., -0.       , -2.3024851,
        -0.       ]])

In [31]:
ss.shape

(60000, 10)

In [35]:
ss2=ss1@ss

array([[-0.22729365, -0.25872258, -0.22863677, -0.2352756 , -0.2241853 ,
        -0.20802953, -0.22710178, -0.24041782, -0.22453067, -0.2282914 ]])

In [37]:
ss1 @ ss @ np.ones((K,1))

array([[-2.3024851]])

In [40]:
ss3= ss @ np.ones((K,1))

In [41]:
ss3.shape

(60000, 1)

In [42]:
ss3

array([[-2.3024851],
       [-2.3024851],
       [-2.3024851],
       ...,
       [-2.3024851],
       [-2.3024851],
       [-2.3024851]])

In [43]:
ss1

array([[1.66666667e-05, 1.66666667e-05, 1.66666667e-05, ...,
        1.66666667e-05, 1.66666667e-05, 1.66666667e-05]])

In [44]:
ss1 @ ss3

array([[-2.3024851]])

In [46]:
np.log(softmax(X, W) + epsilon)

array([[-2.3024851, -2.3024851, -2.3024851, ..., -2.3024851, -2.3024851,
        -2.3024851],
       [-2.3024851, -2.3024851, -2.3024851, ..., -2.3024851, -2.3024851,
        -2.3024851],
       [-2.3024851, -2.3024851, -2.3024851, ..., -2.3024851, -2.3024851,
        -2.3024851],
       ...,
       [-2.3024851, -2.3024851, -2.3024851, ..., -2.3024851, -2.3024851,
        -2.3024851],
       [-2.3024851, -2.3024851, -2.3024851, ..., -2.3024851, -2.3024851,
        -2.3024851],
       [-2.3024851, -2.3024851, -2.3024851, ..., -2.3024851, -2.3024851,
        -2.3024851]])

In [49]:
T

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [53]:

K = np.size(W, 1)
A = np.exp(X @ W)

In [55]:
np.reshape(A @ np.ones((K,1)), -1)

array([10., 10., 10., ..., 10., 10., 10.])

In [74]:
X @ W

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [97]:
B = np.diag(1 / (np.reshape(A @ matrix_fixer, -1)))
B

array([[0.1       , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.1       , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.09788895, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.0969636 , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.09798229,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.09150598]])

In [83]:
from sklearn.linear_model import LogisticRegression
log_clf = LogisticRegression(random_state=0).fit(X_train, y[:60000] )

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [86]:

from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(log_clf, X_train,y[:60000], cv=5)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [88]:

from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(y[:60000],y_train_pred)

In [89]:
matrix

array([[5701,    1,   32,   12,   14,   62,   40,   13,   36,   12],
       [   1, 6555,   30,   21,    9,   28,    4,   16,   66,   12],
       [  33,   71, 5334,  105,   72,   29,   80,   75,  134,   25],
       [  21,   30,  146, 5454,    7,  214,   21,   58,  122,   58],
       [  16,   27,   35,   12, 5433,    8,   56,   22,   43,  190],
       [  60,   22,   51,  177,   53, 4746,   98,   23,  139,   52],
       [  34,   19,   51,    1,   49,   83, 5643,    7,   26,    5],
       [  14,   27,   78,   22,   53,   12,    3, 5838,   20,  198],
       [  35,  116,   78,  160,   40,  167,   50,   20, 5107,   78],
       [  24,   32,   20,   78,  156,   34,    3,  181,   50, 5371]])

In [93]:
need_fix= {
    3:[2,5],
    4:[9],
    5:[3,8],
    7:[9],
    8:[3,5],
    9:[4,7]
    }
matrix_fixer =np.ones((10, 10), np.float)
matrix_fixer

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [94]:
for y, num in need_fix.items():
    for x in num:
        matrix_fixer[y][x] = 1+matrix[y][x]/(sum(matrix[y])-matrix[y][y])


In [95]:
matrix_fixer

array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.21565731, 1.        , 1.        ,
        1.31610044, 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.46454768],
       [1.        , 1.        , 1.        , 1.26222222, 1.        ,
        1.        , 1.        , 1.        , 1.20592593, 1.        ],
       [1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.       