<a href="https://colab.research.google.com/github/sangyeons57/Colab/blob/main/MultiLayerANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 필요한 라이브러리 import
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
class Layer:
    def forward(self, input, training = True):
        raise NotImplementedError

    def backward(self, grad_out):
        raise NotImplementedError

    def params(self):
        return []

In [None]:
class LinearLayer(Layer):
    def __init__(self, input_count, output_count, bias=True):
        self.W = np.random.randn(input_count, output_count) * np.sqrt(2.0 / (input_count))
        self.use_bias = bool(bias)
        if self.use_bias:
          self.b = np.zeros((1,output_count))
        else:
            self.b = None

        self.dW= None
        self.db= None
        self.input = None

    def forward(self, input, training=True):
        self.input = input
        output = input @ self.W
        if self.use_bias:
            output += self.b
        return output

    def backward(self, grad_out):
        self.dW = self.input.T @ grad_out
        if self.use_bias:
            self.db = grad_out.sum(axis=0, keepdims=True)
        else :
            self.db = None

        return grad_out @ self.W.T

    def params(self):
        params = [(self.W, self.dW)]
        if self.use_bias:
            params.append((self.b, self.db))
        return params


In [None]:
# Activation function
class ReLU(Layer):
    def forward(self, x, training=True):
        self.mask = (x > 0)
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, grad_out):
        grad_out[self.mask] = 0
        dx = grad_out
        return dx

In [None]:
# Layer Container
'''
*layers 여러개의 위치 인자를 전달 가능한 객체
'''
class Sequential(Layer):
    def __init__(self, *layers):
        self.layers = list(layers)

    def forward(self, input, training = True):
        for layer in self.layers:
            input = layer.forward(input, training=training)
        return input

    def backward(self, grad_out):
        for layer in reversed(self.layers):
            grad_out = layer.backward(grad_out)
        return grad_out

    def params(self):
        params = []
        for layer in self.layers:
            params += layer.params()
        return params

In [None]:
def softmax(x):
    x -= np.max(x, axis=1, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=1, keepdims=True)

def sigmoid(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

#오차값
class Loss:
    def __init__(self):
        self.params = {}
        self.grads = {}

    def cross_entropy_forward(self, logits, y_true):
        self.probs = softmax(logits)
        self.y_true = y_true
        return -np.sum(y_true * np.log(self.probs + 1e-8)) / logits.shape[0]

    def cross_entropy_backward(self):
        return (self.probs - self.y_true) / self.probs.shape[0]

In [None]:
# optimzier
class SGD:
    def __init__(self, lr = 0.01, decay=0.0):
        self.init_lr = lr
        self.lr = lr
        self.decay = decay
        self.interations = 0

    def step(self, params):
        self.interations += 1
        self.lr = self.init_lr / (1 + self.decay * self.interations)
        for p, dp in params:
            p -= self.lr * dp



In [None]:
class Model:
    def __init__(self, net : Sequential, loss_fn: Loss, optimizer: SGD):
        self.net = net
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.training = True

    def train(self):
        self.training = True

    def eval(self):
        self.training = False

    def forward(self, x):
        return self.net.forward(x, training=self.training)

    def train_step(self, x_batch, y_batch):
        self.train()
        logits = self.forward(x_batch) #순전파
        loss = self.loss_fn.cross_entropy_forward(logits, y_batch) # 오차 구하기
        grad = self.loss_fn.cross_entropy_backward() #오차 역전파
        self.net.backward(grad) # 신경망에 역전파
        self.optimizer.step(self.net.params()) # 기울기에(오차에 대한 영향력) 따른 수정 적용
        return loss

    def predict(self, x):
        self.eval()
        logits = self.forward(x)
        return np.argmax(logits, axis=1)

In [None]:
class Dropout(Layer):
    def __init__(self, rate=0.5):
        self.rate = rate

    def forward(self, input, training=True):
        if training:
            self.mask = (np.random.rand(*input.shape) > self.rate) * (1.0 - self.rate)
            return input * self.mask
        else:
            return input

    def backward(self, grad_out):
        return grad_out * getattr(self, "mask", 1) # training=False 인 경우 mask값이 없어서 들어온 값 그대로 출력



In [None]:
# 배치 정규화
# 학습 안정화
class BatchNorm(Layer):
    def __init__(self, dim, momentum = 0.9, eps=1e-5 ):
        self.gamma = np.ones((1,dim))
        self.beta = np.zeros((1,dim))
        self.momentum = momentum
        self.eps = eps

        self.running_mean = np.zeros((1,dim))
        self.running_var = np.ones((1,dim))

        self.dgama= None
        self.dbeta= None

        self.x_centered = None
        self.x_norm = None
        self.std = None



    def forward(self, input, training=True):
        if training:
            mean = input.mean(axis=0, keepdims=True)
            var = input.var(axis=0, keepdims=True)

            self.x_centered = input - mean
            self.std = np.sqrt(var + self.eps)
            self.x_norm = self.x_centered / self.std
            out = self.gamma * self.x_norm + self.beta

            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
            return out
        else:
            input_norm = (input - self.running_mean) / np.sqrt(self.running_var + self.eps)
            return self.gamma * input_norm + self.beta

    def backward(self, grad_out):
        m = grad_out.shape[0]


        # Parameter gradients
        self.dgamma = (grad_out * self.x_norm).sum(axis=0, keepdims=True)
        self.dbeta = grad_out.sum(axis=0, keepdims=True)

        # Gradient through scale and shift
        dx_norm = grad_out * self.gamma  # (N, D)

        # Intermediate partials for mean/variance
        dvar = (dx_norm * self.x_centered * -0.5 * (self.std ** -3)).sum(axis=0, keepdims=True)
        dmu = (dx_norm * -1.0 / self.std).sum(axis=0, keepdims=True) + dvar * (-2.0 * self.x_centered.mean(axis=0, keepdims=True))

        # Final gradient w.r.t. input
        dx = dx_norm / self.std + dvar * 2.0 * self.x_centered / m + dmu / m
        return dx

    def params(self):
        return [(self.gamma, self.dgamma), (self.beta, self.dbeta)]

In [None]:
from tensorflow.keras.datasets import mnist

# ─── 1. 데이터 로드 및 전처리 ───────────────────────────────────
(train_X, train_y), (test_X, test_y) = mnist.load_data()
# (60000,28,28), (10000,28,28)

# 28x28 이미지 플랫화 및 0~1 정규화
train_X = train_X.reshape(-1, 784) / 255.0
test_X  = test_X.reshape(-1, 784) / 255.0

# 원핫 인코딩
num_classes = 10
train_Y = np.eye(num_classes)[train_y]
test_Y  = np.eye(num_classes)[test_y]

# ─── 2. 모델 구성 ───────────────────────────────────────────────
# net = Sequential(
#     LinearLayer(784, 128),
#     ReLU(),
#     LinearLayer(128, num_classes)
# )

net = Sequential(
    LinearLayer(784, 256, bias=False),      # bias 생략: BN으로 대체
    BatchNorm(256, momentum=0.9),           # 최근 배치 중심
    ReLU(),
    Dropout(rate=0.1),

    LinearLayer(256, 128, bias=False),
    BatchNorm(128, momentum=0.9),
    ReLU(),
    Dropout(rate=0.1),

    LinearLayer(128, num_classes)
)

loss_fn = Loss()
optimizer = SGD(lr=0.5, decay=0.001)
model = Model(net, loss_fn, optimizer)

# ─── 3. 학습 루프 (에폭 & 배치) ─────────────────────────────────
batch_size = 128
epochs = 3
num_train = train_X.shape[0]
steps = num_train // batch_size

for epoch in range(epochs):
    # 데이터 셔플
    idx = np.random.permutation(num_train)
    Xs, Ys = train_X[idx], train_Y[idx]

    epoch_loss = 0
    for i in range(steps):
        start = i * batch_size
        xb = Xs[start:start+batch_size]
        yb = Ys[start:start+batch_size]
        epoch_loss += model.train_step(xb, yb)
        if (i+1) % 100 == 0:
            print(f"Step {i+1}/{steps}, loss={epoch_loss:.4f}")
        elif (i == 0):
            print(f"First Step, loss={epoch_loss:.4f}")
    epoch_loss /= steps
    print(f"Epoch {epoch+1}/{epochs}, loss={epoch_loss:.4f}")

# ─── 4. 테스트 정확도 평가 ────────────────────────────────────
preds = model.predict(test_X)
accuracy = (preds == test_y).mean()
print("Test accuracy:", accuracy)

First Step, loss=2.7464
Step 100/468, loss=40.6903
Step 200/468, loss=61.6069
Step 300/468, loss=79.4382
Step 400/468, loss=95.1625
Epoch 1/3, loss=0.2255
First Step, loss=0.1308
Step 100/468, loss=11.8644
Step 200/468, loss=22.5752
Step 300/468, loss=34.1272
Step 400/468, loss=44.1861
Epoch 2/3, loss=0.1079
First Step, loss=0.1066
Step 100/468, loss=7.5760
Step 200/468, loss=15.5798
Step 300/468, loss=23.8533
Step 400/468, loss=32.1438
Epoch 3/3, loss=0.0794
Test accuracy: 0.9759


In [None]:
# 현제 최대 96.8%
# 신규 최대 97.59%
# 내일 학습룰 99% 도전 ELU, CNN,BN 을 활용해서