In [21]:
%load_ext nb_black
import numpy as np 
import matplotlib.pyplot as plt
import pickle
import scipy.special

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [4]:
def sum_squared_error(y, t):
    return 0.5 * np.sum((y - t) ** 2)

<IPython.core.display.Javascript object>

In [5]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print(sum_squared_error(np.array(y), np.array(t)))
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print(sum_squared_error(np.array(y), np.array(t)))

0.09750000000000003
0.5975


<IPython.core.display.Javascript object>

In [6]:
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))

<IPython.core.display.Javascript object>

In [7]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print(cross_entropy_error(np.array(y), np.array(t)))
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print(cross_entropy_error(np.array(y), np.array(t)))

0.510825457099338
2.302584092994546


<IPython.core.display.Javascript object>

In [8]:
from mnist import load_mnist 

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
print(x_train.shape)
print(t_train.shape)

(60000, 784)
(60000, 10)


<IPython.core.display.Javascript object>

In [9]:
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

<IPython.core.display.Javascript object>

In [10]:
np.random.choice(60000, 10)

array([28277, 26820, 36123, 15663, 28270, 45245, 18203,  8357, 39087,
       44720])

<IPython.core.display.Javascript object>

In [24]:
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    return -np.sum(t * np.log(y + 1e-7)) / batch_size

<IPython.core.display.Javascript object>

In [44]:
def numerical_gradient(f, x):
    print(x)
    h = 1e-4
    grad = np.zeros_like(x)  # x と同じ形状の配列を生成
    for idx in range(x.size):
        tmp_val = x[idx]
        # f(x + h)
        x[idx] = tmp_val + h 
        fxh1 = f(x)
        # f(x - h)
        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val 
    return grad

<IPython.core.display.Javascript object>

In [26]:
def gradient_descent(f, init_x, lr=0.01, step_num =100):
    x = init_x
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad 
    return x

<IPython.core.display.Javascript object>

In [27]:
def func2(x):
    return x[0] ** 2 + x[1] ** 2

init_x = np.array([-3.0, 4.0])
gradient_descent(func2, init_x, 0.1, 100)

array([-6.11110793e-10,  8.14814391e-10])

<IPython.core.display.Javascript object>

In [28]:
init_x = np.array([-3.0, 4.0])
gradient_descent(func2, init_x, 10, 100)

array([-2.58983747e+13, -1.29524862e+12])

<IPython.core.display.Javascript object>

In [29]:
init_x = np.array([-3.0, 4.0])
gradient_descent(func2, init_x, 1e-10, 100)

array([-2.99999994,  3.99999992])

<IPython.core.display.Javascript object>

In [30]:
class SimpleNet:
    def __init__(self):
        self.w = np.random.randn(2, 3) 

    def predict(self, x):
        return np.dot(x, self.w)

    def loss(self, x, t):
        z = self.predict(x)
        y = scipy.special.softmax(z)
        loss = cross_entropy_error(y, t)
        return loss 

<IPython.core.display.Javascript object>

In [46]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)  # f(x + h)
        
        x[idx] = tmp_val - h
        fxh2 = f(x)  # f(x - h)
        grad[idx] = (fxh1 - fxh2) / (2 * h)

        x[idx] = tmp_val
        it.iternext()
    
    return grad

<IPython.core.display.Javascript object>

In [47]:
net = SimpleNet()
print(net.w)
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
print(np.argmax(p))
t = np.array([0, 0, 1])
net.loss(x, t)

[[-0.29791728  0.82569797 -0.5372849 ]
 [-0.39811187 -0.80979695  0.21523969]]
[-0.53705105 -0.23339848 -0.12865522]
2


0.9420640312482158

<IPython.core.display.Javascript object>

In [48]:
def f(w):
    return net.loss(x, t)

print(net.w)
dw = numerical_gradient(f, net.w)
print(dw)

[[-0.29791728  0.82569797 -0.5372849 ]
 [-0.39811187 -0.80979695  0.21523969]]
[[ 0.15547256  0.21063396 -0.36610653]
 [ 0.23320885  0.31595094 -0.54915979]]


<IPython.core.display.Javascript object>

In [41]:
np.random.randn(2,3)

array([[ 0.11292675,  1.419947  ,  0.83542899],
       [ 0.53091949, -0.02265948, -0.40730274]])

<IPython.core.display.Javascript object>

In [53]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params["w1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["w2"] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)

    def predict(self, x):
        w1, w2 = self.params["w1"], self.params["w2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        a1 = np.dot(x, w1) + b1
        z1 = scipy.special.expit(a1)  # sigmoid 
        a2 = np.dot(z1, w2) + b2
        y = scipy.special.softmax(a2)
        return y 

    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)

        grads = {}
        grads["w1"] = numerical_gradient(loss_w, self.params["w1"])
        grads["b1"] = numerical_gradient(loss_w, self.params["b1"])
        grads["w2"] = numerical_gradient(loss_w, self.params["w2"])
        grads["b2"] = numerical_gradient(loss_w, self.params["b2"])

        return grads


<IPython.core.display.Javascript object>

In [54]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
for p in net.params.values():
    print(p.shape)

(784, 100)
(100,)
(100, 10)
(10,)


<IPython.core.display.Javascript object>

In [55]:
x = np.random.rand(100, 784)
y = net.predict(x)

<IPython.core.display.Javascript object>

In [57]:
x = np.random.rand(100, 784)
t = np.random.rand(100, 10)

grads = net.numerical_gradient(x, t)
for g in grads.values():
    print(g.shape)

(784, 100)
(100,)
(100, 10)
(10,)


<IPython.core.display.Javascript object>

In [None]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
train_loss_list = []

niters = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(niters):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.numerical_gradient(x_batch, t_batch)

    for key in ('w1', 'b1', 'w2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)


In [None]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
train_loss_list = []
train_acc_list = []
test_acc_list = []

niters = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
iter_per_epoch = max(train_size/ batch_size, 1)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(niters):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.numerical_gradient(x_batch, t_batch)

    for key in ('w1', 'b1', 'w2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(f"train acc: {train_acc}, test acc: {test_acc}")