In [64]:
import numpy as np
import pickle
from collections import OrderedDict
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from tensorflow import keras
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, Input, Activation, Dropout, BatchNormalization   
from keras.layers import Conv2D, MaxPool2D, Flatten
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
from keras.layers import SimpleRNN, LSTM
from keras.layers import Embedding

import matplotlib.pyplot as plt
import koreanize_matplotlib
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import classification_report

In [None]:
np.set_printoptions(suppress=True, precision=10)       # 0.999
np.set_printoptions(suppress=False, precision=10)      # 9.99e-01

## 다모아져있는거

In [None]:
import numpy as np

class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = x <= 0
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, dout):
        dx = dout.copy()
        dx[self.mask] = 0
        return dx

class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

def cross_entropy_error(p, r):
    delta = 1e-7
    batch_size = p.shape[0]
    total_loss = -np.sum(r * np.log(p + delta))
    return total_loss / batch_size                               # 평균으로 맞춰줌

def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)

    for idx in np.ndindex(x.shape):
        tmp_val = x[idx]

        x[idx] = tmp_val + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val

    return grad

def softmax(x):
    if x.ndim == 1:
        c = np.max(x)
        exp_a = np.exp(x-c)
        sum_exp_a = np.sum(exp_a)
        y = exp_a / sum_exp_a
        return y
    elif x.ndim == 2:
        c = np.max(x, axis = 1).reshape(-1, 1)
        exp_a = np.exp(x - c)
        sum_exp_a = np.sum(exp_a, axis = 1).reshape(-1, 1)
        y = exp_a / sum_exp_a
        return y

class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    def backward(self, dout = 1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        return dx

class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.x = None
        self.dw = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = x @ self.w + self.b
        return out

    def backward(self, dout):
        dx = dout @ self.w.T
        self.dw = self.x.T @ dout
        self.db = np.sum(dout, axis = 0)
        return dx


In [None]:
from collections import OrderedDict

class TwoLayerNet:
    def __init__(self, I, H, O):
        self.params = {}
        self.params['w1'] = np.random.randn(I, H)
        self.params['b1'] = np.random.randn(H)
        self.params['w2'] = np.random.randn(H, O)
        self.params['b2'] = np.random.randn(O)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1'])
        self.layers['Relu1'] = Relu()                       # 시그모이드로 바꾸고 싶으면 이부분만 수정
        self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2'])
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for i in self.layers.values():
            x = i.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis = 1)
        if t.ndim != 1:
            t = np.argmax(t, axis = 1)
        accuracy = np.sum(y == t) / x.shape[0]
        return accuracy

    def gradient(self, x, t):
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()

        for i in layers:
            dout = i.backward(dout)

        grads = {}
        grads['w1'] = self.layers['Affine1'].dw
        grads['b1'] = self.layers['Affine1'].db
        grads['w2'] = self.layers['Affine2'].dw
        grads['b2'] = self.layers['Affine2'].db
        return grads

## 경사하강법

In [None]:
class SGD:
    def __init__(self, lr = 0.1):
        self.lr = lr

    def update(self, params, grads):
        for key in params.keys():
            params[key] -= grads[key] * self.lr

class Momentum:
    def __init__(self, lr = 0.01, momentum = 0.9):
        self.lr = lr
        self.momentum = momentum 
        self.v = None 

    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                # 속도가 모두 0.
                self.v[key] = np.zeros_like(val)
                
        for key in params.keys():
            self.v[key] = self.momentum * self.v[key] + grads[key]
            params[key] -= self.lr * self.v[key]

class AdaGrad:
    def __init__(self, lr = 0.01, epsilon = 1e-8):
        self.lr = lr
        self.epsilon = epsilon
        self.h = None

    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val) 

        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + self.epsilon)

class Adam:
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.iter = 0 
        self.m = None
        self.v = None

    def update(self, params, grads):
        self.iter += 1
        
        if self.m is None:
            self.m, self.v = {}, {}
            
            for key, val in params.items():
                self.m[key] = np.zeros_like(val)
                self.v[key] = np.zeros_like(val)

        for key in params.keys():
            self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * grads[key]
            self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * (grads[key] ** 2)

            m_hat = self.m[key] / (1 - self.beta1**self.iter)
            v_hat = self.v[key] / (1 - self.beta2**self.iter)
            
            params[key] -= self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)

In [None]:
train_size = len(train_scaled)
batch_size = 100

optimizer = AdaGrad()                 # 이거 바꿔가면서

train_size = len(train_scaled)
batch_size = 100
lr = 0.1

net = TwoLayerNet(784, 50, 10)

train_losses = []
test_losses = []

train_acc = []
test_acc = []


for i in range(1, 1001):
    mask = np.random.choice(train_size, batch_size)
    x_batch = train_scaled[mask]
    t_batch = train_y[mask]

    grad = net.gradient(x_batch, t_batch)

    params = net.params   
    optimizer.update(params, grad)
    
    loss = net.loss(x_batch, t_batch)
    test_loss = net.loss(test_scaled, test_y)

    train_losses.append(loss)
    test_losses.append(test_loss)

    if i % 100 == 0:
        a = net.accuracy(train_scaled, train_y)
        b = net.accuracy(test_scaled, test_y)
        train_acc.append(a)
        test_acc.append(b)
        print(f"{i}회 학습 / 정확도(훈련) : {a:.3f}, 정확도(시험) : {b:.3f}")

## loss 시각화 

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_losses, label = 'train')
plt.plot(test_losses, label = 'test')
plt.legend()
plt.grid()
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history["loss"])          # 학습
plt.plot(history.history["val_loss"])    # 쪽지시험
plt.legend(["train", "test"])
plt.grid()
plt.show()

In [None]:
model.evaluate(test_scaled, test_y)
# [loss값, 정확도]