### 1.2.1 신경망 추론 전체 그림

#### 완전연결계층 코딩 실습

##### 완전 연결계층에 의한 변환의 미니배치 구현

In [1]:
import numpy as np

W1 = np.random.randn(2, 4)  # 가중치
b1 = np.random.randn(4)     # 편향 
x = np.random.randn(10, 2)  # 입력
h = np.matmul(x, W1) + b1
print(h)


[[-1.24000366  3.04544052 -0.44235178 -1.05815328]
 [-4.39353902 -1.49643055  4.83324546  0.90497929]
 [ 3.17634619 -0.96317311 -1.68802405 -0.52360303]
 [ 0.67070143 -1.25009337  0.53593218 -0.01577409]
 [ 2.09592904 -0.43012539 -1.11812214 -0.51261986]
 [ 2.02457146  4.31788142 -3.87223773 -2.00437768]
 [ 1.74693284  0.80664823 -1.56467045 -0.84621355]
 [ 1.85696385  0.75514994 -1.62436148 -0.848215  ]
 [-1.32654785 -1.11726259  2.09448255  0.27452289]
 [-1.26888604  0.2879631   1.21479098 -0.18008946]]


##### 비선형 활성화 함수를 이용하여 신경망의 표현력을 높임

In [2]:

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

x = np.random.randn(10, 2)
W1 = np.random.randn(2, 4)
b1 = np.random.randn(4)
W2 = np.random.randn(4, 3)
b2 = np.random.randn(3)

h = np.matmul(x, W1) + b1
a = sigmoid(h)
s = np.matmul(a, W2) + b2

print(s)


[[-2.05901641 -4.15521395 -1.77113949]
 [-1.90983627 -4.22236104 -2.19223139]
 [-1.432128   -3.17420425 -1.11785206]
 [-1.79410485 -4.33912303 -2.85766657]
 [-1.77277526 -2.82517474  0.12981712]
 [-1.73598895 -4.38610636 -2.92516049]
 [-2.05618148 -4.08867    -1.62875936]
 [-1.3592505  -2.88395272 -0.71528679]
 [-1.98484835 -4.2540416  -2.16803874]
 [-1.8955853  -4.29974816 -2.4548625 ]]


In [3]:
class Sigmoid:
    def __init__(self):
        self.params = []
        
    def forward(self, x):
        return 1 / (1 + np.exp(-x))

class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        
    def forward(self, x):
        W, b = self.params
        out = np.matmul(x, W) + b
        return out

### 1.2.2 계층으로 클래스화 및 순전파 구현

#### 신경망 처리를 계층(layer)로 구현

In [4]:
class Sigmoid:
    def __init__(self):
        self.params = []
        
    def forward(self, x):
        return 1 / (1 + np.exp(-x))

class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        
    def forward(self, x):
        W, b = self.params
        out = np.matmul(x, W) + b
        return out


##### 예시코드

In [5]:
import numpy as np

# 레이어 생성
affine1 = Affine(W=np.random.randn(2, 3), b=np.zeros(3))  # 입력 2차원, 출력 3차원
sigmoid = Sigmoid()
affine2 = Affine(W=np.random.randn(3, 1), b=np.zeros(1))  # 입력 3차원, 출력 1차원

# 샘플 입력 데이터
x = np.array([[0.1, 0.5]])  # 배치 크기 1, 특성 2개

# 순전파(forward propagation) 실행
h1 = affine1.forward(x)      # 첫 번째 affine 변환
h2 = sigmoid.forward(h1)     # 활성화 함수 적용
y = affine2.forward(h2)      # 두 번째 affine 변환

print("입력 데이터:", x)
print("첫 번째 은닉층 출력:", h1)
print("활성화 함수 적용 후:", h2)
print("최종 출력:", y)

# 파라미터 확인
print("\n파라미터 확인:")
print("Affine1 파라미터:", affine1.params)
print("Sigmoid 파라미터:", sigmoid.params)
print("Affine2 파라미터:", affine2.params)

입력 데이터: [[0.1 0.5]]
첫 번째 은닉층 출력: [[-0.0716402   0.26957182  1.41086835]]
활성화 함수 적용 후: [[0.48209761 0.56698779 0.80390287]]
최종 출력: [[-0.2043349]]

파라미터 확인:
Affine1 파라미터: [array([[-1.14362811,  0.567009  , -0.57878563],
       [ 0.08544522,  0.42574185,  2.93749383]]), array([0., 0., 0.])]
Sigmoid 파라미터: []
Affine2 파라미터: [array([[-0.09200721],
       [-1.20537624],
       [ 0.6511423 ]]), array([0.])]


##### (1) TwoLayerNet 클래스 신경망

In [6]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size
        
        # 가중치와 편향 초기화
        W1 = np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = np.random.randn(H, O)
        b2 = np.random.randn(O)
        
        # 계층 생성
        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]
        
        # 모든 가중치를 리스트에 모은다.
        self.params = []
        for layer in self.layers:
            self.params += layer.params
            
    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

In [7]:
# 입력 데이터 생성 (10개의 샘플, 각각 2차원)
x = np.random.randn(10, 2)

# 2-4-3 구조의 신경망 모델 생성
model = TwoLayerNet(2, 4, 3)

# 예측 수행
s = model.predict(x)
print(s)

[[ 0.91397965 -1.66465204  0.27253488]
 [ 0.87028018 -1.30414993  0.90061502]
 [ 0.97599007 -1.84104859  0.01923302]
 [ 0.9360887  -1.5018601   0.51560461]
 [ 0.94710176 -1.41747998  0.68330381]
 [ 0.73037909 -1.47455074  0.64049816]
 [ 0.78092866 -1.68479444  0.35152162]
 [ 0.6384145  -2.06482416  0.24858677]
 [ 0.72162174 -1.77901629  0.35365871]
 [ 0.89395185 -1.50908361  0.46868189]]


#### 1.3.6 가중치 갱신(1)

In [8]:
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]

In [9]:
import numpy as np

class Sigmoid:
    def __init__(self):
        self.params = []
        self.out = None
        
    def forward(self, x):
        self.out = 1 / (1 + np.exp(-x))
        return self.out
        
    def backward(self, dout):
        return dout * self.out * (1 - self.out)

class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self, x):
        W, b = self.params
        self.x = x
        out = np.matmul(x, W) + b
        return out
        
    def backward(self, dout):
        W, b = self.params
        self.dW = np.matmul(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        dx = np.matmul(dout, W.T)
        return dx

class MSE:
    def forward(self, y, t):
        self.y = y
        self.t = t
        diff = y - t
        return np.sum(diff ** 2) / len(y)
    
    def backward(self):
        return 2 * (self.y - self.t) / len(self.y)

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size
        
        # 가중치와 편향 초기화
        W1 = np.random.randn(I, H) * 0.01  # 작은 초기값
        b1 = np.zeros(H)
        W2 = np.random.randn(H, O) * 0.01
        b2 = np.zeros(O)
        
        # 계층 생성
        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]
        self.loss_layer = MSE()
        
        # 모든 가중치를 리스트에 모은다
        self.params = []
        for layer in self.layers:
            self.params += layer.params
            
    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def forward(self, x, t):
        y = self.predict(x)
        loss = self.loss_layer.forward(y, t)
        return loss
    
    def gradient(self, x, t):
        # 순전파
        loss = self.forward(x, t)
        
        # 역전파
        dout = self.loss_layer.backward()
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
            
        # 기울기 리스트 생성
        grads = []
        for layer in self.layers:
            if isinstance(layer, Affine):
                grads.append(layer.dW)
                grads.append(layer.db)
            
        return grads

class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]

# 데이터 생성
x = np.random.randn(10, 2)  # 입력 데이터
t = np.random.randn(10, 3)  # 목표값

# 모델과 옵티마이저 초기화
model = TwoLayerNet(2, 4, 3)
optimizer = SGD(lr=0.1)

# 학습 루프
losses = []
for epoch in range(100):  # 100 에포크 동안 학습
    # 미니배치 획득
    batch_mask = np.random.choice(10, 5)
    x_batch = x[batch_mask]
    t_batch = t[batch_mask]
    
    # 기울기 계산
    loss = model.forward(x_batch, t_batch)
    grads = model.gradient(x_batch, t_batch)
    
    # 매개변수 갱신
    optimizer.update(model.params, grads)
    
    # 손실 기록
    losses.append(loss)
    
    # 20 에포크마다 손실 출력
    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

# 최종 예측
final_pred = model.predict(x)
print("\n최종 예측값:")
print(final_pred)

Epoch 20, Loss: 3.0357
Epoch 40, Loss: 2.6965
Epoch 60, Loss: 0.9056
Epoch 80, Loss: 2.1439
Epoch 100, Loss: 1.9815

최종 예측값:
[[ 0.20266399 -0.35110293  0.24456534]
 [ 0.19621042 -0.34179565  0.24179674]
 [ 0.18796077 -0.32974902  0.23848594]
 [ 0.16989639 -0.30407139  0.230151  ]
 [ 0.21866642 -0.37418404  0.25142913]
 [ 0.22925982 -0.38964728  0.25569621]
 [ 0.15718969 -0.28590596  0.22443358]
 [ 0.17019181 -0.30454079  0.23021147]
 [ 0.23632914 -0.39964125  0.25904678]
 [ 0.20870665 -0.36007908  0.24675689]]


In [20]:
import sys
sys.path.append('..')  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
from data import spiral
import matplotlib.pyplot as plt

# 데이터 로드
x, t = spiral.load_data()
print('x', x.shape)  # (300, 2)
print('t', t.shape)  # (300, 3)

# 데이터 시각화
N = 100
CLS_NUM = 3
markers = ['o', 'x', '^']

for i in range(CLS_NUM):
    plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], marker=markers[i])

plt.xlabel('x0')
plt.ylabel('x1')
plt.title('스파이럴 데이터셋')
plt.grid(True)
plt.show()

AttributeError: module 'numpy' has no attribute 'int'.
`np.int` was a deprecated alias for the builtin `int`. To avoid this error in existing code, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from data.spiral import load_data  # 경로 수정

# 데이터 로드
x, t = load_data()
print('x', x.shape)  # (300, 2)
print('t', t.shape)  # (300, 3)

# 데이터 시각화
N = 100
CLS_NUM = 3
markers = ['o', 'x', '^']

for i in range(CLS_NUM):
    plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], marker=markers[i])

plt.xlabel('x0')
plt.ylabel('x1')
plt.title('스파이럴 데이터셋')
plt.grid(True)
plt.show()

AttributeError: module 'numpy' has no attribute 'int'.
`np.int` was a deprecated alias for the builtin `int`. To avoid this error in existing code, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations