# **순전파**

In [2]:
import numpy as np

# ReLU 함수 정의
def relu(x):
  return np.maximum(0,x)

# 입력층 데이터(1*3)
x=np.array([[0.5,0.8,0.2]])

# 은닉층 가중치(3*2), 편향(1*2)
w1=np.array([
    [1.0,-1.0],
    [0.0,2.0],
    [1.0,0.5]
])

b1=np.array([[0.1,0.2]])

# 은닉층 계산
z1=x@w1+b1
a1=relu(z1)

# 출력층 가중치(2*1), 편향(1*1)
w2=np.array([
    [2.0],
    [-1.0]
])

b2=np.array([[0.5]])

# 출력층 계산
z2=a1@w2+b2
y_pred=relu(z2)

# 결과 출력
print("입력 벡터 x:", x)
print("은닉층 선형 출력 z1:", z1)
print("은닉층 활성화 a1 (ReLU):", a1)
print("출력층 선형 출력 z2:", z2)
print("최종 예측 y_pred (ReLU):", y_pred)

입력 벡터 x: [[0.5 0.8 0.2]]
은닉층 선형 출력 z1: [[0.8 1.4]]
은닉층 활성화 a1 (ReLU): [[0.8 1.4]]
출력층 선형 출력 z2: [[0.7]]
최종 예측 y_pred (ReLU): [[0.7]]


# **역전파 (직접 미분)**

In [6]:
import numpy as np

# ReLU 함수와 도함수 정의
def relu(x):
  return np.maximum(0,x)

def relu_deriv(x):
  return (x>0).astype(float)

# 입력층 데이터(1*3)
x=np.array([[0.5,0.8,0.2]])

# 정답값
y=np.array([[1.0]])

# 은닉층 가중치(3*2), 편향(1*2)
w1=np.array([
    [1.0,-1.0],
    [0.0,2.0],
    [1.0,0.5]
])

b1=np.array([[0.1,0.2]])

# 출력층 가중치(2*1), 편향(1*1)
w2=np.array([
    [2.0],
    [-1.0]
])

b2=np.array([[0.5]])

# 순전파 계산
z1=x@w1+b1
a1=relu(z1)
z2=a1@w2+b2
y_pred=relu(z2)

# 손실함수
loss= 0.5*(y_pred-y)**2

# --- 역전파 ---
# 출력층 계산
# dL_dW2 = dL_dy * dy_dz2 * dz2_dW2
dL_dy = y_pred - y                     # dL/dy_pred
dy_dz2 = relu_deriv(z2)                # ReLU 미분
dz2 = dL_dy * dy_dz2                   # dL/dz2 (1,1)

dW2 = a1.T @ dz2                       # (2,1)
db2 = dz2                              # (1,1)

# 은닉층 계산
dz1 = dz2 @ w2.T * relu_deriv(z1)      # (1,2)
dW1 = x.T @ dz1                        # (3,2)
db1 = dz1                              # (1,2)

# 가중치 및 편향 업데이트
lr=0.1

w1-=lr*dW1
b1-=lr*db1
w2-=lr*dW2
b2-=lr*db2

# 두 번째 순전파
z1_2=x@w1+b1
a1_2=relu(z1_2)
z2_2=a1_2@w2+b2
y_pred_2=relu(z2_2)
loss_2=0.5*(y_pred_2-2)**2

# 결과 출력
print("---순전파---")
print("z1:", z1)
print("a1:", a1)
print("z2:", z2)
print("y_pred:", y_pred)
print("loss:", loss.item())

print("\n---역전파---")
print("dW2:", dW2)
print("db2:", db2)
print("dW1:", dW1)
print("db1:", db1)

print("\n---업데이트 후---")
print("W1:", w1)
print("b1:", b1)
print("W2:", w2)
print("b2:", b2)

print("\n---두 번째 순전파---")
print("z1:", z1_2)
print("a1:", a1_2)
print("z2:", z2_2)
print("y_pred:", y_pred_2)
print("loss:", loss_2.item())

---순전파---
z1: [[0.8 1.4]]
a1: [[0.8 1.4]]
z2: [[0.7]]
y_pred: [[0.7]]
loss: 0.04500000000000008

---역전파---
dW2: [[-0.24]
 [-0.42]]
db2: [[-0.3]]
dW1: [[-0.3   0.15]
 [-0.48  0.24]
 [-0.12  0.06]]
db1: [[-0.6  0.3]]

---업데이트 후---
W1: [[ 1.03  -1.015]
 [ 0.048  1.976]
 [ 1.012  0.494]]
b1: [[0.16 0.17]]
W2: [[ 2.024]
 [-0.958]]
b2: [[0.53]]

---두 번째 순전파---
z1: [[0.9158 1.3421]]
a1: [[0.9158 1.3421]]
z2: [[1.0978474]]
y_pred: [[1.0978474]]
loss: 0.40693965684337957


# **역전파(pytorch 사용)**

In [9]:
import torch
import torch.nn.functional as F

# 입력 데이터와 정답
x = torch.tensor([[0.5, 0.8, 0.2]], dtype = torch.float32)
y = torch.tensor([[1.0]], dtype = torch.float32)

# 가중치 및 편향
w1 = torch.tensor([[1.0, -1.0],
                   [0.0, 2.0],
                    [1.0, 0.5]], requires_grad = True)
b1 = torch.tensor([[0.1, 0.2]], dtype = torch.float32, requires_grad = True)

w2 = torch.tensor([[2.0],
                   [-1.0]], dtype=torch.float32, requires_grad=True)

b2 = torch.tensor([[0.5]], dtype=torch.float32, requires_grad=True)

# 순전파
z1=x@w1+b1
a1=F.relu(z1)
z2=a1@w2+b2
y_pred=F.relu(z2)
loss=0.5*(y_pred-y)**2

# 역전파
loss.backward()

# 파라미터 업데이트
lr=0.1
with torch.no_grad():
  w1-=lr*w1.grad
  b1-=lr*b1.grad
  w2-=lr*w2.grad
  b2-=lr*b2.grad

# 두 번째 순전파
z1_2=x@w1+b1
a1_2=F.relu(z1_2)
z2_2=a1@w2+b2
y_pred_2=F.relu(z2_2)
loss_2=0.5*(y_pred_2-y)**2

# 결과 출력
print("y_red: ",y_pred_2.item())

y_red:  0.8079999685287476


# **순전파+역전파 for문**

In [11]:
import torch
import torch.nn.functional as F

# 입력 데이터와 정답
x = torch.tensor([[0.5, 0.8, 0.2]], dtype = torch.float32)
y = torch.tensor([[1.0]], dtype = torch.float32)

# 가중치 및 편향
W1 = torch.tensor([[1.0, -1.0],
                   [0.0, 2.0],
                    [1.0, 0.5]], requires_grad = True)
b1 = torch.tensor([[0.1, 0.2]], dtype = torch.float32, requires_grad = True)

W2 = torch.tensor([[2.0],
                   [-1.0]], dtype=torch.float32, requires_grad=True)

b2 = torch.tensor([[0.5]], dtype=torch.float32, requires_grad=True)

# 학습 설정
lr = 0.1
epochs=10

print("---학습 시작---")
for epoch in range(1,epochs+1):
  # 순전파
  z1=x@w1+b1
  a1=F.relu(z1)
  z2=a1@w2+b2
  y_pred=F.relu(z2)
  loss=0.5*(y_pred-y)**2

  #역전파
  loss.backward()

  #파라미터 업데이트
  with torch.no_grad():
    w1-=lr*w1.grad
    b1-=lr*b1.grad
    w2-=lr*w2.grad
    b2-=lr*b2.grad

    #기울기 초기화
    w1.grad.zero_()
    b1.grad.zero_()
    w2.grad.zero_()
    b2.grad.zero_()

  print("Epoch",epoch, "| y_pred: ", y_pred.item(), "| Loss: ", loss.item())



---학습 시작---
Epoch 1 | y_pred:  1.281174898147583 | Loss:  0.03952966257929802
Epoch 2 | y_pred:  0.9322159290313721 | Loss:  0.0022973401937633753
Epoch 3 | y_pred:  1.0208213329315186 | Loss:  0.00021676394680980593
Epoch 4 | y_pred:  0.9935687780380249 | Loss:  2.0680308807641268e-05
Epoch 5 | y_pred:  1.0019829273223877 | Loss:  1.966000354514108e-06
Epoch 6 | y_pred:  0.9993883371353149 | Loss:  1.870657229119388e-07
Epoch 7 | y_pred:  1.0001883506774902 | Loss:  1.773798885551514e-08
Epoch 8 | y_pred:  0.9999423027038574 | Loss:  1.6644889910821803e-09
Epoch 9 | y_pred:  1.0000176429748535 | Loss:  1.5563728084089234e-10
Epoch 10 | y_pred:  0.9999944567680359 | Loss:  1.5363710303972766e-11
