# **순전파**

In [3]:
import numpy as np
# ReLU 함수 정의
def relu(x):return np.maximum(0,x)
# 입력층 데이터(1*3)
x = np.array([[0.5, 0.8, 0.2]])
# 은닉층 가중치(3*2), 편향(1*2)
w1 = np.array([
    [1.0, -1.0],
    [0.0, 2.0],
    [1.0, 0.5]
])

b1 = np.array([[0.1, 0.2]])
# 은닉층 계산
z1 = x@ w1 + b1
a1 = relu(z1)
# 출력층 가중치(2*1), 편향(1*1)
w2 = np.array([
    [2.0],
    [-1.0]
])

b2 = np.array([[0.5]])
# 출력층 계산
z2 = a1 @ w2 + b2
y_pred = relu(z2)
# 결과 출력
print("입력 벡터 x:", x)
print("은닉층 선형 출력 z1:", z1)
print("은닉층 활성화 a1 (ReLU):", a1)
print("출력층 선형 출력 z2:", z2)
print("최종 예측 y_pred (ReLU):", y_pred)

입력 벡터 x: [[0.5 0.8 0.2]]
은닉층 선형 출력 z1: [[0.8 1.4]]
은닉층 활성화 a1 (ReLU): [[0.8 1.4]]
출력층 선형 출력 z2: [[0.7]]
최종 예측 y_pred (ReLU): [[0.7]]


순전파

# **역전파 (직접 미분)**

In [6]:
import numpy as np

# ReLU 함수와 도함수 정의
def relu(x):
  return np.maximum(0, x)

def relu_deriv(x):
  return (x > 0).astype(float)
# 입력층 데이터(1*3)
x = np.array([[0.5, 0.8, 0.2]])
# 정답값
y = np.array([[1.0]])
# 은닉층 가중치(3*2), 편향(1*2)
w1 = np.array([
    [1.0, -1.0],
    [0.0, 2.0],
    [1.0, 0.5]
])

b1 = np.array([[0.1, 0.2]])
# 출력층 가중치(2*1), 편향(1*1)
w2 = np.array([
    [2.0],
    [-1.0]
])

b2 = np.array([[0.5]])
# 순전파 계산
z1 = x@ w1 + b1
a1 = relu(z1)
z2 = a1 @ w2 + b2
y_pred = relu(z2)
# 손실함수
loss = 0.5 * (y_pred - y) ** 2
# --- 역전파 ---
# 출력층 계산
# dL_dW2 = dL_dy * dy_dz2 * dz2_dW2
dL_dy = y_pred - y                     # dL/dy_pred
dy_dz2 = relu_deriv(z2)                # ReLU 미분
dz2 = dL_dy * dy_dz2                   # dL/dz2 (1,1)

dw2 = a1.T @ dz2                       # (2,1)
db2 = dz2                              # (1,1)

# 은닉층 계산
dz1 = dz2 @ w2.T * relu_deriv(z1)      # (1,2)
dw1 = x.T @ dz1                        # (3,2)
db1 = dz1                              # (1,2)

# 가중치 및 편향 업데이트
lr =0.1

w1 -= lr * dw1
b1 -= lr * db1
w2 -= lr * dw2
b2 -= lr * db2
# 두 번째 순전파
z1_2 = x @ w1 + b1
a1_2 = relu(z1_2)
z2_2 = a1_2 @ w2 + b2
y_pred_2 = relu(z2_2)
loss_2 = 0.5  * (y_pred_2 - y) ** 2

# 결과 출력
print("---순전파---")
print("z1:", z1)
print("a1:", a1)
print("z2:", z2)
print("y_pred:", y_pred)
print("loss:", loss.item())

print("\n---역전파---")
print("dw2:", dw2)
print("db2:", db2)
print("dw1:", dw1)
print("db1:", db1)

print("\n---업데이트 후---")
print("W1:", w1)
print("b1:", b1)
print("W2:", w2)
print("b2:", b2)

print("\n---두 번째 순전파---")
print("z1:", z1_2)
print("a1:", a1_2)
print("z2:", z2_2)
print("y_pred:", y_pred_2)
print("loss:", loss_2.item())

---순전파---
z1: [[0.8 1.4]]
a1: [[0.8 1.4]]
z2: [[0.7]]
y_pred: [[0.7]]
loss: 0.04500000000000008

---역전파---
dw2: [[-0.24]
 [-0.42]]
db2: [[-0.3]]
dw1: [[-0.3   0.15]
 [-0.48  0.24]
 [-0.12  0.06]]
db1: [[-0.6  0.3]]

---업데이트 후---
W1: [[ 1.03  -1.015]
 [ 0.048  1.976]
 [ 1.012  0.494]]
b1: [[0.16 0.17]]
W2: [[ 2.024]
 [-0.958]]
b2: [[0.53]]

---두 번째 순전파---
z1: [[0.9158 1.3421]]
a1: [[0.9158 1.3421]]
z2: [[1.0978474]]
y_pred: [[1.0978474]]
loss: 0.004787056843380046


역전파 직접미분

# **역전파(pytorch 사용)**

In [19]:
import torch
import torch.nn.functional as F

# 입력 데이터와 정답
x = torch.tensor([[0.5, 0.8, 0.2]], dtype = torch.float32)
y = torch.tensor([[1.0]], dtype = torch.float32)

# 가중치 및 편향
W1 = torch.tensor([[1.0, -1.0],
                   [0.0, 2.0],
                    [1.0, 0.5]], requires_grad = True)
b1 = torch.tensor([[0.1, 0.2]], dtype = torch.float32, requires_grad = True)

W2 = torch.tensor([[2.0],
                   [-1.0]], dtype=torch.float32, requires_grad=True)

b2 = torch.tensor([[0.5]], dtype=torch.float32, requires_grad=True)

# 순전파
z1 = x @ W1 + b1
a1 = F.relu(z1)
z2 = a1 @ W2+ b2
y_pred = F.relu(z2)
loss = 0.5 * (y_pred - y) ** 2
# 역전파
loss.backward()

# 파라미터 업데이트
lr =0.1
with torch.no_grad():
  W1 -= lr * W1.grad
  b1 -= lr * b1.grad
  W2 -= lr * W2.grad
  b2 -= lr * b2.grad
# 두 번째 순전파
z1_2 = x @ W1 + b1
a1_2 = F.relu(z1_2)
z2_2 = a1 @ W2+ b2
y_pred_2 = F.relu(z2_2)
loss_2 = 0.5 * (y_pred_2 - y) ** 2

# 결과 출력
print("y_pred: ", y_pred_2.item())

y_pred:  0.8079999089241028


역전파 파이토치사용

# **순전파+역전파 for문**

In [24]:
import torch
import torch.nn.functional as F
# 입력 데이터와 정답
x = torch.tensor([[0.5, 0.8, 0.2]], dtype = torch.float32)
y = torch.tensor([[1.0]], dtype = torch.float32)

# 가중치 및 편향
W1 = torch.tensor([[1.0, -1.0],
                   [0.0, 2.0],
                    [1.0, 0.5]], requires_grad = True)
b1 = torch.tensor([[0.1, 0.2]], dtype = torch.float32, requires_grad = True)

W2 = torch.tensor([[2.0],
                   [-1.0]], dtype=torch.float32, requires_grad=True)

b2 = torch.tensor([[0.5]], dtype=torch.float32, requires_grad=True)

# 학습 설정
lr = 0.1
epochs = 10

print("---학습 시작---")
for epoch in range(1, epochs+1):
  # 순전파
  z1 = x @ W1 + b1
  a1 = F.relu(z1)
  z2 = a1 @ W2+ b2
  y_pred = F.relu(z2)
  loss = 0.5 * (y_pred - y) ** 2

  # 역전파
  loss.backward()

  # 파라미터 업데이트
  lr =0.1
  with torch.no_grad():
    W1 -= lr * W1.grad
    b1 -= lr * b1.grad
    W2 -= lr * W2.grad
    b2 -= lr * b2.grad

    #  기울기 초기화
    W1.grad.zero_()
    b1.grad.zero_()
    W2.grad.zero_()
    b2.grad.zero_()
  print("Epoch", epoch, "l y_pred: ", y_pred.item(), "l loss: ", loss.item())

# 두 번째 순전파
z1_2 = x @ W1 + b1
a1_2 = F.relu(z1_2)
z2_2 = a1 @ W2+ b2
y_pred_2 = F.relu(z2_2)
loss_2 = 0.5 * (y_pred_2 - y) ** 2

# 결과 출력
print("y_pred: ", y_pred_2.item())

---학습 시작---
Epoch 1 l y_pred:  0.6999999284744263 l loss:  0.045000020414590836
Epoch 2 l y_pred:  1.0978477001190186 l loss:  0.004787086043506861
Epoch 3 l y_pred:  0.9676424264907837 l loss:  0.0005235062562860548
Epoch 4 l y_pred:  1.0106127262115479 l loss:  5.63149806112051e-05
Epoch 5 l y_pred:  0.9965112209320068 l loss:  6.085789664211916e-06
Epoch 6 l y_pred:  1.0011460781097412 l loss:  6.567474883922841e-07
Epoch 7 l y_pred:  0.9996234178543091 l loss:  7.090705622658788e-08
Epoch 8 l y_pred:  1.0001239776611328 l loss:  7.685230229981244e-09
Epoch 9 l y_pred:  0.9999592304229736 l loss:  8.310792054544436e-10
Epoch 10 l y_pred:  1.0000131130218506 l loss:  8.597567102697212e-11
y_pred:  1.000008225440979
