In [1]:
from sklearn.utils import shuffle

import numpy as np

np.random.seed(1234)

In [19]:
def sigmoid(x):
    w = np.ones(len(x))
    return  1 / (1 + np.exp(-np.dot(w, x)))

def deriv_sigmoid(x):
    w = np.ones(len(x))
    e = np.exp(- np.dot(w, x))
    return np.multiply(e / (1 + e)**2, x)

print(sigmoid(np.array([0.1, 0.2, 0.1])))
print(deriv_sigmoid(np.array([0.1, 0.2, 0.1])))

0.598687660112
[ 0.02402607  0.04805215  0.02402607]


In [65]:
# OR
train_X = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
train_y = np.array([[1], [1], [0], [1]])
test_X, test_y = train_X, train_y

# weights
W = np.random.uniform(low=-0.08, high=0.08, size=(2, 1)).astype('float32')
b = np.zeros(1).astype('float32')

##### 誤差関数
* 負の対数尤度関数 (Negative Loglikelihood Function）
* 交差エントロピー誤差関数ともいう

$$ E ( {\bf \theta} ) =  -\sum^N_{i=1} \left[ t_i \log y ({\bf x}_i ; {\bf \theta}) + (1 - t_i) \log \{ 1 - y ({\bf x}_i ; {\bf \theta}) \}\right] $$

In [63]:
def train(x, t, eps=1.0):
    global W, b # to access variables that defined outside of this function.
    
    # Forward Propagation
    print(x, W)
    y = 1/ (1 + np.exp(- np.matmul(x, W)))
    
    # Back Propagation (Cost Function: Negative Loglikelihood)
    cost = np.sum(-t*np.log(y)-(1-t)*np.log(1-y))
    # delta = np.sum(np.matmul(np.diag(y - t), x), axis=0)
    delta = np.matmul(np.transpose(x), y - t)
    
    # Update Parameters
    x_tr = np.transpose(x)
    P = np.diag(np.multiply(y, 1 - y)).reshape(-1, len(x))
    a = np.matmul(x_tr, P)
    b = np.matmul(a, x)
    Q = np.linalg.inv(b)
    dW = np.matmul(np.matmul(Q, x_tr, y - t))
    #db = # WRITE ME!
    W = W - eps*dW
    #b = b - eps*db

    return cost

def test(x, t):
    # Test Cost
    y = 1/ (1 + np.exp(- np.matmul(x, W)))
    cost = np.sum(-t*np.log(y)-(1-t)*np.log(1-y))
    return cost, y

In [67]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def train(x, t, eps=1.0):
    global W, b # to access variables that defined outside of this function.
    
    # Forward Propagation
    y = sigmoid(np.matmul(x, W) + b)
    
    # Back Propagation (Cost Function: Negative Loglikelihood)
    cost = np.sum(-t*np.log(y)-(1-t)*np.log(1-y))
    delta = np.sum(y - t)
    
    # Update Parameters
    dW = np.multiply(delta, x.reshape(2,1))
    db = delta
    W = W - eps*dW
    b = b - eps*db

    return cost

def test(x, t):
    # Test Cost
    y = sigmoid(np.matmul(x, W) + b)
    cost = np.sum(-t*np.log(y)-(1-t)*np.log(1-y))
    return cost, y

In [70]:
# Epoch
for epoch in range(1000):
    # Online Learning
    for x, y in zip(train_X, train_y):
        cost = train(x[np.newaxis, :], y[np.newaxis, :])
    cost, pred_y = test(test_X, test_y)

print('test input', test_X)
print('test output', test_y)
print('predicted', pred_y)

test input [[0 1]
 [1 0]
 [0 0]
 [1 1]]
test output [[1]
 [1]
 [0]
 [1]]
predicted [[ 0.99933265]
 [ 0.99933176]
 [ 0.00166658]
 [ 1.        ]]


In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def deriv_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [3]:
def softmax(x):
    exp = np.exp(x)
    return exp / np.sum(exp, axis=1, keepdims=True)


def deriv_softmax(x):
    return softmax(x) * (1 - softmax(x))

In [8]:
def tanh(x):
    return # WRITE ME!


def deriv_tanh(x):
    return # WRITE ME!

In [41]:
# XOR
train_X = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
train_y = np.array([[1], [1], [0], [0]])
test_X, test_y = train_X, train_y

# Layer1 weights
W1 = np.random.uniform(low=-0.08, high=0.08, size=(2, 3)).astype('float32')
b1 = np.zeros(3).astype('float32')

# Layer2 weights
W2 = np.random.uniform(low=-0.08, high=0.08, size=(3, 1)).astype('float32')
b2 = np.zeros(1).astype('float32')

##### 誤差関数
* 負の対数尤度関数 (Negative Loglikelihood Function）
* 交差エントロピー誤差関数ともいう

$$ E ( {\bf \theta} ) =  -\sum^N_{i=1} \left[ t_i \log y ({\bf x}_i ; {\bf \theta}) + (1 - t_i) \log \{ 1 - y ({\bf x}_i ; {\bf \theta}) \}\right] $$

In [42]:
def train(x, t, eps=1.0):
    global W1, b1, W2, b2 # to access variables that defined outside of this function.

    # Forward Propagation Layer1
    u1 = np.matmul(x, W1) + b1
    z1 = sigmoid(u1)
    
    # Forward Propagation Layer2
    u2 = np.matmul(z1, W2) + b2
    z2 = sigmoid(u2)
    
    # Back Propagation (Cost Function: Negative Loglikelihood)
    y = z2
    cost = np.sum(-t*np.log(y) - (1 - t)*np.log(1 - y))
    delta_2 = np.sum(y - t, axis=0)[np.newaxis, :]
    delta_1 = deriv_sigmoid(u1).reshape(3, 1) * np.matmul(W2, delta_2)
    
    # Update Parameters Layer1
    dW1 = np.matmul(delta_1, x).T
    db1 = delta_1.reshape(3)
    W1 = W1 - eps*dW1
    b1 = b1 - eps*db1
    
    # Update Parameters Layer2
    dW2 = np.matmul(delta_2, z1).T
    db2 = delta_2.reshape(1)
    W2 = W2 - eps*dW2
    b2 = b2 - eps*db2

    return cost

def test(x, t):
    # Forward Propagation Layer1
    u1 = np.matmul(x, W1) + b1
    z1 = sigmoid(u1)
    
    # Forward Propagation Layer2
    u2 = np.matmul(z1, W2) + b2
    z2 = sigmoid(u2)
    
    y = z2
    
    # Test Cost
    cost = np.sum(-t*np.log(y)-(1-t)*np.log(1-y))
    return cost, y

In [43]:
# Epoch
for epoch in range(2000):
    # Online Learning
    for x, y in zip(train_X, train_y):
        cost = train(x[np.newaxis, :], y[np.newaxis, :])
    cost, pred_y = test(test_X, test_y)

print(pred_y)

[[  9.98948747e-01]
 [  9.98910806e-01]
 [  8.12031650e-04]
 [  1.85537181e-03]]


## 課題. MNISTデータセットを多層パーセプトロン(MLP)で学習せよ

### 注意
- homework関数を完成させて提出してください
    - 訓練データはtrain_X, train_y, テストデータはtest_Xで与えられます
    - train_Xとtrain_yをtrain_X, train_yとvalid_X, valid_yに分けるなどしてモデルを学習させてください
    - test_Xに対して予想ラベルpred_yを作り, homework関数の戻り値としてください\
- pred_yのtest_yに対する精度(F値)で評価します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください (必要なものは全てhomework関数に入れてください)
- 解答提出時には Answer Cell の内容のみを提出してください

### ヒント
- 出力yはone-of-k表現
- 最終層の活性化関数はソフトマックス関数, 誤差関数は多クラス交差エントロピー
- 最終層のデルタは教科書参照

In [141]:
def homework(train_X, train_y, test_X):
    def onehot(y):
        # y: 0~9
        v = np.zeros(10)
        v[y] = 1.0
        return v
    
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def deriv_sigmoid(x):
        return sigmoid(x) * (1 - sigmoid(x))
    
    def softmax(x):
        exp = np.exp(x)
        return exp / np.sum(exp, axis=1, keepdims=True)
    
    LAYER0_UNITS = 784
    LAYER1_UNITS = 100
    LAYER2_UNITS = 10
    EPS = 0.1
    # Layer1 weights
    W1 = np.random.uniform(low=-0.08, high=0.08, size=(LAYER0_UNITS, LAYER1_UNITS)).astype('float32')
    b1 = np.zeros(LAYER1_UNITS).astype('float32')

    # Layer2 weights
    W2 = np.random.uniform(low=-0.08, high=0.08, size=(LAYER1_UNITS, LAYER2_UNITS)).astype('float32')
    b2 = np.zeros(LAYER2_UNITS).astype('float32')
    
    for epoch in range(500):
        # Online learning
        for (X, t) in zip(train_X, train_y):
            # X: (784), t: 0~9
            _X, _t = X[np.newaxis, :], np.array([onehot(t)])
            u1 = np.matmul(_X, W1) + b1
            z1 = sigmoid(u1)
            u2 = np.matmul(z1, W2) + b2
            z2 = softmax(u2)
            y = z2

            # Cost and Delta for backpropagation
            cost = - np.sum(np.matmul(np.log(y), _t.T))
            delta_2 = np.sum(y - _t, axis=0)[np.newaxis, :]
            delta_1 = deriv_sigmoid(u1) * np.matmul(delta_2, W2.T)

            # Update Parameters Layer1
            dW1 = np.matmul(_X.T, delta_1)
            db1 = delta_1.reshape(LAYER1_UNITS)
            W1 = W1 - EPS * dW1
            b1 = b1 - EPS * db1
            # Update Parameters Layer1
            dW2 = np.matmul(z1.T, delta_2)
            db2 = delta_2.reshape(LAYER2_UNITS)
            W2 = W2 - EPS * dW2
            b2 = b2 - EPS * db2

    # Predict
    u1 = np.matmul(test_X, W1) + b1
    z1 = sigmoid(u1)
    u2 = np.matmul(z1, W2) + b2
    z2 = softmax(u2)
    pred_y = np.argmax(z2, axis=1)
    
    return pred_y

In [142]:
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split

import numpy as np

def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                               mnist.target.astype('int32'), random_state=42)

    mnist_X = mnist_X / 255.0

    return train_test_split(mnist_X, mnist_y,
                test_size=0.2,
                random_state=42)

def validate_homework():
    train_X, test_X, train_y, test_y = load_mnist()

    # validate for small dataset
#     train_X_mini = train_X[:100]
#     train_y_mini = train_y[:100]
    train_X_mini = train_X[:20000]
    train_y_mini = train_y[:20000]
    test_X_mini = test_X[:100]
    test_y_mini = test_y[:100]

    pred_y = homework(train_X_mini, train_y_mini, test_X_mini)
    print(test_y_mini[:20])
    print(pred_y[:20])
    print("f value", f1_score(test_y_mini, pred_y, average='macro'))
    print("accuracy", accuracy(test_y_mini, pred_y))

def score_homework():
    train_X, test_X, train_y, test_y = load_mnist()
    pred_y = homework(train_X, train_y, test_X)
    print(f1_score(test_y, pred_y, average='macro'))
    
def accuracy(pred, label):
    cmp = (pred == label)
    return np.sum(cmp) / len(cmp)
        

In [143]:
validate_homework()
# score_homework()

[7 5 8 9 0 6 6 3 9 5 8 2 0 6 9 0 8 0 1 3]
[7 5 8 9 0 6 6 3 9 5 8 2 0 6 9 0 8 0 1 3]
f value 0.980389016018
accuracy 0.98


In [128]:
train_X, test_X, train_y, test_y = load_mnist()

(56000, 784)


In [55]:
print(train_X.shape, train_y.shape)
print(test_X.shape, test_y.shape)
print(np.amax(train_y))

(56000, 784) (56000,)
(14000, 784) (14000,)
9


In [140]:
score_homework()

KeyboardInterrupt: 