In [1]:
import numpy as np

## 손실 함수(Loss function)

In [3]:
def sum_squares_error(y, t):
    return 0.5 * np.sum((y-t)**2)

In [4]:
def cross_entropy_error(y, t):
    delta = 1e-7      #log안에 들어가는 값이 0이 되지 않도록 작은 값을 더함 (델타)
    return -np.sum(t * np.log(y + delta)) #y+delta는 절대 0이 되지 않음
#y: 확률값
#delta: y가 0이 되지 않도록 더해주는 값

In [5]:
t = np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])
y = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
print(sum_squares_error(y, t))
print(cross_entropy_error(y, t))

0.6475
2.9957302735559908


## 수치 미분

In [6]:
#x가 1차원인 경우
def numerical_diff(f, x):
    h = 1e-4
    return (f(x + h) - f(x - h)) / (2 * h)

In [7]:
def function_1(x):
    return 0.01 * x ** 2 + 0.1 * x

x = np.arange(0.0, 20.0, 0.1) # 0에서 20까지 0.1 간격의 배열 생성 (20 미포함)
print(numerical_diff(function_1, 5))
print(numerical_diff(function_1, 10))

#실제 미분값: 0.2, 0.3

0.1999999999990898
0.2999999999986347


In [8]:
#x가 n차원인 경우
def numerical_gradient(f,x):
    h = 1e-4
    grad = np.zeros_like(x) #초기화: x와 크기가 같은, entry가 0인 행렬

    for idx in range(x.size):
        tmp_val = x[idx]

        #f(x+h)계산
        x[idx] = tmp_val + h
        fxh1 = f(x)
    
        #f(x-h)계산
        x[idx] = t['0mp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val #값 복원

    return grad

In [9]:
def function_2(x):
    return x[0] ** 2 + x[1] ** 2

print(numerical_gradient(function_2, np.array([3.0, 4.0])))
print(numerical_gradient(function_2, np.array([0.0, 2.0])))

[6. 8.]
[0. 4.]


## 경사하강법(Gradient Descent)

In [10]:
def gradient_descent(f, init_x, lr=0.01, step_num=100): # lr 은 learning rate의 약자, 즉 학습률
    x = init_x #초기 weight값

    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
    return x #출력 weight 값

In [11]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x = init_x, lr = 0.1, step_num = 100)

array([-6.11110793e-10,  8.14814391e-10])

#### 학습률이 너무 큰 경우

In [12]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100)

#값에 수렴하지 못하고 발산함

array([-2.58983747e+13, -1.29524862e+12])

#### 학습률이 너무 작은 경우

In [13]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100)

#학습 속도가 더딤

array([-2.99999994,  3.99999992])

## Neural Network (수치미분 사용)

In [14]:
def sigmoid(x):
  #return 1 / (1 + np.exp(-x))
    return np.exp(x) / (np.exp(x) + 1)


def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

In [17]:
#2층짜리 Neural Network Layer
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    #가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) #(input_size, hidden_size)크기의 0~1사이의 entry값을 갖는 행렬
        self.params['b1'] = weight_init_std * np.zeros(hidden_size) #(hidden_size) 크기의 0의 entry값을 갖는 행렬
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = weight_init_std * np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, W1) + b1 #가중치 W1, 편향 b1
        z1 = sigmoid(a1) #sigmoid 함수(활성화 함수)
        a2 = np.dot(z1, W2) + b2 #가중치 W2, 편향 b2
        y = softmax(a2) #softmax 함수

        return y

  # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t) #cross entropy에 넣어서 loss값 구하기

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1) # 가장 큰 값의 index 출력
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0]) # 정답 / 전체
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t) #lambda : 한 줄로 함수 만듦

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

In [42]:
import pandas as pd
data = pd.read_csv("mnist_train_small.csv")
data

Unnamed: 0,6,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.581,0.582,0.583,0.584,0.585,0.586,0.587,0.588,0.589,0.590
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19994,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19995,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19996,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19997,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
data['6']

0        5
1        7
2        9
3        5
4        2
        ..
19994    0
19995    1
19996    2
19997    9
19998    5
Name: 6, Length: 19999, dtype: int64

In [45]:
t = data.iloc[:, 0] #iloc: 원하는 위치의 데이터만 추출하는 함수
#iloc0[:, 0] #전체 데이터 받아와서(:), 첫번째 열만 보여주기(0)
t

0        5
1        7
2        9
3        5
4        2
        ..
19994    0
19995    1
19996    2
19997    9
19998    5
Name: 6, Length: 19999, dtype: int64

In [50]:
x = data.iloc[:, 1:]
x

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,...,0.581,0.582,0.583,0.584,0.585,0.586,0.587,0.588,0.589,0.590
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19994,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
x = np.array(x)
t = np.array(t)
iter_num = 10000
learning_rate = 0.1

#TwoLayer Network 적용
network = TwoLayerNet(input_size = 784, hidden_size=50, output_size=10)

for i in range(iter_num):
    grad = network.numerical_gradient(x, t)
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x, t)
    train_loss_list.append(loss)

## Neural Network (Backpropagation)

In [56]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis = 0)

        return dx

#### Sigmoid Layer

In [64]:
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

#### ReLU Layer

In [63]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

#### Softmax with Loss

In [61]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    def backward(self, dout=1):
        dx = (self.y - self.t)
        return dx

In [62]:
import numpy as np
from collections import OrderedDict

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        #가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) #(input_size, hidden_size)크기의 0~1사이의 entry값을 갖는 행렬
        self.params['b1'] = weight_init_std * np.zeros(hidden_size) #(hidden_size) 크기의 0의 entry값을 갖는 행렬
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = weight_init_std * np.zeros(output_size)

        #계층 생성
        self.layers = OrderedDict() #OrderedDict는 순서가 있는 딕셔너리, 추가한 순서대로 정렬
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastlayer = SoftmaxWithLoss()

    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

  # x: 입력 데이터, t: 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastlayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def gradient(self, x, t):
        #순전파
        self.loss(x, t)

        #역전파
        dout = 1
        dout = self.lastlayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        #결과 저장
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads