In [1]:
import numpy as np
from functions import *
from gradient import numerical_gradient

In [2]:
class simpleNet:    #간단한 신경망 구현
    def __init__(self):
        self.W = np.random.randn(2, 3) #2x3 가중치 배열
    def predict(self, x):               #예측값 반환
        return np.dot(x, self.W)
    def loss(self, x, t):
        z = self.predict(x)         #z: 예측값
        y = softmax(z)              #y = a.f값
        loss = cross_entropy_error(y, t)    #교제 제곱 오차: 타깃과 예측의 차이
        return loss

In [3]:
x = np.array([0.6, 0.9])    #입력값
t = np.array([0, 0, 1]) #target: 정답
net = simpleNet()
f = lambda w: net.loss(x, t)

dW = numerical_gradient(f, net.W)
for i in range(5):
    print(net.W)
    print(softmax(net.predict(x)))
    print(net.loss(x, t))
    net.W -= dW

[[ 1.20109725 -1.69259724  0.44337087]
 [-0.90359848  0.23810579  1.02588161]]
[0.19624164 0.09660823 0.70715014]
0.34651213772192035
[[ 1.08335228 -1.75056217  0.61908076]
 [-1.08021593  0.1511584   1.28944645]]
[0.1259554  0.06967322 0.80437138]
0.21769407633254262
[[ 0.96560732 -1.8085271   0.79479065]
 [-1.25683338  0.06421101  1.55301129]]
[0.07728408 0.04803584 0.87468008]
0.1338969632568107
[[ 0.84786235 -1.86649202  0.97050055]
 [-1.43345083 -0.02273638  1.81657613]]
[0.04596438 0.03210132 0.9219343 ]
0.08128120919665376
[[ 0.73011739 -1.92445695  1.14621044]
 [-1.61006828 -0.10968377  2.08014098]]
[0.02678715 0.02102104 0.95219181]
0.048988678335961634


In [4]:
class simpleNet:
    def __init__(self, a, b):   #a: X.r, b: X.c
        self.W = np.random.randn(a, b)
    def predict(self, x):
        return np.dot(x, self.W)
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        return loss
    
x = np.array([0.5, -2.2, -7])
t = np.array([0, 1, 0, 0])
net = simpleNet(3, 4)
f = lambda w: net.loss(x, t)
dW = numerical_gradient(f, net.W)
for i in range(5):
    print(net.W)
    print(softmax(net.predict(x)))
    print(net.loss(x, t))
    net.W -= dW

[[-0.36197308  0.51418862  1.61766819  0.81757999]
 [ 0.75940835 -0.29633415 -0.47506047  1.19658565]
 [-0.53475504  1.26451112 -0.19537048 -0.35881737]]
[2.00717696e-01 1.07564535e-05 7.58892050e-01 4.03794980e-02]
11.430750864873882
[[-0.46140752  1.00957774  1.24171729  0.79757621]
 [ 1.19691985 -2.47604626  1.17912351  1.28460228]
 [ 0.85732705 -5.67093649  5.06794213 -0.07876444]]
[2.11314924e-24 1.00000000e+00 8.15143780e-37 2.29252782e-21]
-9.999999505838704e-08
[[ -0.56084195   1.50496685   0.86576638   0.77757244]
 [  1.63443135  -4.65575837   2.83330749   1.3726189 ]
 [  2.24940914 -12.60638411  10.33125475   0.20128848]]
[2.39300596e-52 1.00000000e+00 9.41797509e-78 1.40003024e-45]
-9.999999505838704e-08
[[ -0.66027638   2.00035597   0.48981548   0.75756866]
 [  2.07194285  -6.83547048   4.48749147   1.46063553]
 [  3.64149124 -19.54183172  15.59456737   0.48134141]]
[2.70992575e-080 1.00000000e+000 1.08813018e-118 8.54988394e-070]
-9.999999505838704e-08
[[ -0.75971081   2.4

In [5]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): #입력 크기, 히든 레이어 크기, 출력 크기, 실수값으로 조지기
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) #입력x히든 사이즈로 난수 배열 만들고 실수로 값 변환
        self.params['b1'] = np.zeros(hidden_size)   #히든 레이어 개수만큼 
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0]) #np.sum(y==t) y==t인 case의 횟수를 저장, 이를 평균을 내줌
        return accuracy                               #그렇게 구한 정확도를 반환
    
    def numerical_gradient(self, x, t):                 #손실함수를 w에 대해 미분함 -> gradient를 반환
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads
    
    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        batch_num = x.shape[0]
        #forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        #backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)
        return grads


In [6]:
from datasetsub.mnist import load_mnist
(x_train, y_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
train_loss_list = []
iters_num = 100
learning_rate = 1
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
for i in range(iters_num):
    grad = network.numerical_gradient(x_train, y_train)
    for key in {'W1', 'b1', 'W2', 'b2'}:
        network.params[key] -= learning_rate * grad[key]
        train_loss_list.append(loss)

KeyboardInterrupt: 

In [None]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(784, 50, 10)
iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)
iter_per_epoch = 10
for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    grad = network.gradient(x_batch, t_batch)
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))        

### 오늘의 연습
### 최소 20개 이상의 자료에 대한 좌표를 지정.
### 또한 각 10개 이상의 동일 그룹에 대해 0, 1, ~ 등으로 출력 레이블을 지정
### 테스트 좌표는 4개 이상 지정

### 예시

In [11]:
x_train = np.array([[0, 0], #입력 4개->20개
                    [0, 1],
                    [1, 0],
                    [1, 1]])
# OR 게이트
t_train = np.array([[0],    #출력 레이블 2개(0, 1) -> ""
                    [1],
                    [1],
                    [1]])

x_test = np.array([[0.5, 0.5], [1.1, 0.9]]) #테스트 좌표 2개 -> 4개
t_test = np.array([[1], [1]])               #테스트 좌표의 레이블 2개 -> 2개

network = TwoLayerNet(2, 3, 1) #(784, 50, 10)-> (2, 5||3, 1)
iters_num = 10000                 #10000->100
train_size = x_train.shape[0]
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = 100
for i in range(iters_num):
    grad = network.gradient(x_train, t_train)
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    loss = network.loss(x_train, t_train)
    train_loss_list.append(loss)
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))        

train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train acc, test acc | 1.0, 1.0
train ac

# 과제

In [None]:
import numpy as np
import random
from functions import *
from gradient import numerical_gradient

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): #입력 크기, 히든 레이어 크기, 출력 크기, 실수값으로 조지기
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) #입력x히든 사이즈로 난수 배열 만들고 실수로 값 변환
        self.params['b1'] = np.zeros(hidden_size)   #히든 레이어 개수만큼 
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0]) #np.sum(y==t) y==t인 case의 횟수를 저장, 이를 평균을 내줌
        return accuracy                               #그렇게 구한 정확도를 반환
    
    def numerical_gradient(self, x, t):                 #손실함수를 w에 대해 미분함 -> gradient를 반환
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads
    
    def gradient(self, x, t):   #x=20x2, t=20x1
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        batch_num = x.shape[0]  #20
        #forward
        a1 = np.dot(x, W1) + b1 
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        #backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)
        return grads


#학습데이터 20개 생성
x_train0 = np.random.uniform(0, 1, (10, 2))
x_train0 = np.round(x_train0, decimals=2)

x_train1 = np.random.uniform(1, 2, (10, 2))
x_train1 = np.round(x_train1, decimals=2)

x_train = np.vstack((x_train0, x_train1))

# 출력 레이블: 0 || 1
t_train = np.where(np.any(x_train > 1, axis=1), 1, 0).reshape(-1, 1)

# 테스트 데이터 4개
x_test = np.random.uniform(0, 2, (20, 2))
x_test = np.round(x_test, decimals=2)
# x_test의 예상 레이블 4개
t_test = np.where(np.any(x_test > 1, axis=1), 1, 0).reshape(-1, 1)     

network = TwoLayerNet(2, 3, 1) #(784, 50, 10)-> (2, 5||3, 1), (#입력, #히든 레이어, #출력)
iters_num = 3000                 #10000->100
train_size = x_train.shape[0] #20
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = 100
for i in range(iters_num):
    grad = network.gradient(x_train, t_train)
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    loss = network.loss(x_train, t_train)
    train_loss_list.append(loss)
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))   

In [None]:
print(x_train)
print(x_test)

## 나도 이거 뭔지 기억 안 남

In [None]:
# import numpy as np
# import random

# # 20개 이상의 학습 데이터를 생성 (10개씩 두 그룹으로 나눔)
# x_train = np.random.uniform(0, 2, (20, 2))
# x_train = np.round(x_train, decimals=2)

# # 출력 레이블 설정 (그룹 0은 0, 그룹 1은 1로 설정)
# t_train = np.where(np.any(x_train > 1, axis=1), 1, 0)   

# # 테스트 데이터 (4개 이상의 좌표)
# x_test = np.random.uniform(0, 2, (4, 2))
# x_test = np.round(x_test, decimals=2)

# # 테스트 데이터의 예상 레이블
# t_test = np.where(np.any(x_test > 1, axis=1), 1, 0)    


# print('x_train')
# print(x_train)

# print('\nt_train')
# print(t_train)

# print('\nx_test')
# print(x_test)

# print('\nt_test')
# print(t_test)  