In [1]:
from layers import *
from utils import *

In [2]:
from collections import OrderedDict
import numpy as np
from dataset import load_mnist

In [3]:
class ThreeLayerNet:
    def __init__(self, input_size, hidden_size, output_size, std=1e-2):
        self.params = {}
        self.params['W1'] = std * np.random.randn(input_size, hidden_size[0])
        self.params['b1'] = np.zeros(hidden_size[0])
        self.params['W2'] = std * np.random.randn(hidden_size[0], hidden_size[1])
        self.params['b2'] = np.zeros(hidden_size[1])
        self.params['W3'] = std * np.random.randn(hidden_size[1], output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Sigmoid2'] = Sigmoid()
        self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3'])
        
        self.lastLayer = SoftmaxWithLoss()

        self.x_train = None
        self.t_train = None

    def predict(self, X):
        for layer in self.layers.values():
            X = layer.forward(X)
        return X

    def loss(self, X, T):
        Y = self.predict(X)
        Y, loss = self.lastLayer.forward(Y, T)
        return loss
    
    def gradient(self, X, T):
        loss = self.loss(X, T)

        dout = 1
        dout = self.lastLayer.backward()

        layers_reverse = reversed(list(self.layers.values()))

        for layer in layers_reverse:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        grads['W3'] = self.layers['Affine3'].dW
        grads['b3'] = self.layers['Affine3'].db

        return grads
    
    def set_x_train(self, x_train):
        self.x_train = x_train
    
    def set_t_train(self, t_train):
        self.t_train = t_train

    def train(self, total_steps=10000, learning_rate=0.1, batch_size=100):
        train_loss_list = []
        train_size = self.x_train.shape[0]
        iter_per_epoch = max(train_size / batch_size, 1)

        for i in range(total_steps):
            batch_mask = np.random.choice(train_size, batch_size)
            x_batch = self.x_train[batch_mask]
            t_batch = self.t_train[batch_mask]

            grad = self.gradient(x_batch, t_batch)

            for key in grad.keys():
                self.params[key] -= learning_rate * grad[key]

            loss = self.loss(x_batch, t_batch)
            train_loss_list.append(loss)
            
            # 출력은 선택
            if i % iter_per_epoch == 0:
                print('iteration: ', i, 'loss: ', loss)

In [4]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
nn = ThreeLayerNet(input_size=784, hidden_size=[100, 50], output_size=10)

In [5]:
total_steps = 10000
batch_size = 100
learning_rate = 0.1

nn.set_x_train(x_train)
nn.set_t_train(t_train)

In [6]:
nn.train(total_steps=total_steps, learning_rate=learning_rate, batch_size=100)

iteration:  0 loss:  2.291243006900547
iteration:  600 loss:  2.301827088860268
iteration:  1200 loss:  2.0350310333979307
iteration:  1800 loss:  0.8810366766338142
iteration:  2400 loss:  0.6507471127372795
iteration:  3000 loss:  0.40934238061939604
iteration:  3600 loss:  0.37111665449854486
iteration:  4200 loss:  0.2178531731882485
iteration:  4800 loss:  0.29866564339653917
iteration:  5400 loss:  0.15465187826930482
iteration:  6000 loss:  0.21691496560235682
iteration:  6600 loss:  0.21520868194876158
iteration:  7200 loss:  0.11198845923613668
iteration:  7800 loss:  0.05734554863076653
iteration:  8400 loss:  0.1440885561796086
iteration:  9000 loss:  0.03214058959136496
iteration:  9600 loss:  0.04976254966604356


In [12]:
class SimpleCNN:
    def __init__(self, input_dim=(1, 28, 28), 
                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                 hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))

        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(W = self.params['W1'], b = self.params['b1'], stride = filter_stride, pad = filter_pad)
        self.layers['Relu1'] = Relu()
        self.layers['Pooling1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(W = self.params['W2'], b = self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(W = self.params['W3'], b = self.params['b3'])

        self.lastLayer = SoftmaxWithLoss()

    def predict(self, X):
        for layer in self.layers.values():
            X = layer.forward(X)
        return X
    
    def loss(self, X, T):
        Y = self.predict(X)
        return self.lastLayer.forward(Y, T)
    
    def gradient(self, X, T):
        self.loss(X, T)

        dout = 1
        dout = self.lastLayer.backward()

        layers_reverse = reversed(list(self.layers.values()))
        for layer in layers_reverse:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db

        return grads
    
    def set_x_train(self, x_train):
        self.x_train = x_train
    
    def set_t_train(self, t_train):
        self.t_train = t_train
    
    def train(self, total_steps=10000, learning_rate=0.1, batch_size=100):
        train_loss_list = []
        train_size = self.x_train.shape[0]
        iter_per_epoch = max(train_size / batch_size, 1)

        for i in range(total_steps):
            batch_mask = np.random.choice(train_size, batch_size)
            x_batch = self.x_train[batch_mask]
            t_batch = self.t_train[batch_mask]

            grad = self.gradient(x_batch, t_batch)

            for key in grad.keys():
                self.params[key] -= learning_rate * grad[key]

            loss = self.loss(x_batch, t_batch)
            train_loss_list.append(loss)
            
            # 출력은 선택
            if i % iter_per_epoch == 0:
                print('iteration: ', i, 'loss: ', loss)

In [13]:
total_steps = 10000
batch_size = 100
learning_rate = 0.1

In [17]:
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
t_train

array([5, 0, 4, ..., 5, 6, 8], shape=(60000,), dtype=uint8)

In [15]:
cnn = SimpleCNN()
cnn.set_x_train(x_train)
cnn.set_t_train(t_train)

In [16]:
cnn.train(total_steps=total_steps, learning_rate=learning_rate, batch_size=batch_size)

ValueError: operands could not be broadcast together with shapes (100,10) (100,) 