In [1]:
import numpy as np

![nn](./perceptron.png)

# 1단계: feedforward 순전파
# 2단계: 손실계산
# 3단계: backpropagation 역전파

In [61]:
# Multi layer perceptron
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        #가중치 초기화
        # self.w1_2_3_4 = np.random.random((self.input_size, self.hidden_size))
        self.w1_2_3_4 = [[1, 10],[1, 10]] #강의용으로 결과를 빨리보고자 값을 fix 함
        # self.w5_6 = np.random.random((self.hidden_size, self.output_size))
        self.w5_6 = [[-40], [40]] #강의용으로 결과를 빨리보고자 값을 fix 함

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, x):
        # propagate inputs through the network
        self.z1_2 = np.dot(x, self.w1_2_3_4) # 1 X 2
        self.h = self.sigmoid(self.z1_2)     # 1 X 2
        self.z3 = np.dot(self.h, self.w5_6)  # 1 X 1
        self.o = self.sigmoid(self.z3)       # 1 X 1
        return self.o

    def mse_loss(self, y_true, y_pred):
        # MSE 손실계산
        return np.mean((y_true - y_pred) ** 2)

    def backward(self, x, y, y_pred, learning_rate):
        # 체인룰 계산
        dc_do1 = -2 * (y - y_pred)
        do1_dz3 = y_pred * (1 - y_pred)
        dz3_dw5_6 = self.h
        dc_dw5_6 = dc_do1 * do1_dz3 * dz3_dw5_6                # 1 X 2
        self.w5_6 = self.w5_6 + learning_rate * -dc_dw5_6.T    # 2 X 1
        dc_dw1_2_3_4 = dc_do1 * do1_dz3 * np.dot(self.w5_6 * (self.h * (1 - self.h)).T, x)

        self.w1_2_3_4 = self.w1_2_3_4 + learning_rate * -dc_dw1_2_3_4.T

    def train(self, x_train, y_train, ephochs, learning_rate):
        # for epoch in range(epochs):
        for epoch in range(ephochs):
            for i in range(len(x_train)):
                # Forward pass
                y_pred = self.forward([x_train[i]])
                # Compute and print Loss
                loss = self.mse_loss([y_train[i]], y_pred)
                # Backward pass
                self.backward([x_train[i]], [y_train[i]], y_pred, learning_rate)
            if np.mod(epoch,100)==0:
                print('epoch=',epoch, 'loss=',loss)

![nn](./xor.png)

In [62]:
# 데이터 생성
x_train = np.random.randint(0, 2, (100,2))
y_train = (x_train[:,0]!=x_train[:,1]).astype(int)

In [63]:
x_train[1]

array([1, 1])

In [64]:
y_train[1]

0

In [65]:
# 다층퍼셉트론 선언
mlp = MLP(input_size=2, hidden_size=2, output_size=1)

In [66]:
# 모델 학습
mlp.train(x_train, y_train, ephochs=1000, learning_rate=0.1)

epoch= 0 loss= 0.34725335316148703
epoch= 100 loss= 0.0024860516621991262
epoch= 200 loss= 0.002027648622569982
epoch= 300 loss= 0.001863744750340665
epoch= 400 loss= 0.0017781330320093913
epoch= 500 loss= 0.0017239498944598664
epoch= 600 loss= 0.0016849674923523062
epoch= 700 loss= 0.0016541822942871354
epoch= 800 loss= 0.0016281624782455643
epoch= 900 loss= 0.0016050845278198075


In [68]:
# 테스트 값으로 모델값 예측
test_input = np.array([[0, 0]])
predicted_output = mlp.forward(test_input)
print("Predicted output: ", test_input, predicted_output)
test_input = np.array([[1, 0]])
predicted_output = mlp.forward(test_input)
print("Predicted output: ", test_input, predicted_output)
test_input = np.array([[0, 1]])
predicted_output = mlp.forward(test_input)
print("Predicted output: ", test_input, predicted_output)
test_input = np.array([[1, 1]])
predicted_output = mlp.forward(test_input)
print("Predicted output: ", test_input, predicted_output)

Predicted output:  [[0 0]] [[0.0125051]]
Predicted output:  [[1 0]] [[0.97352959]]
Predicted output:  [[0 1]] [[0.96965226]]
Predicted output:  [[1 1]] [[0.0392337]]
