# 第2回講義 演習

In [1]:
from sklearn.utils import shuffle
from sklearn.datasets import fetch_mldata, fetch_openml
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import numpy as np

np.random.seed(34)

## 目次

課題1. ロジスティック回帰の実装と学習 (OR)
1. シグモイド関数
2. データセットの設定と重みの定義
3. train関数とvalid関数
4. 学習

課題2. ソフトマックス回帰の実装と学習 (MNIST)
1. ソフトマックス関数
2. データセットの設定と重みの定義
3. train関数とvalid関数
4. 学習

## 課題1. ロジスティック回帰の実装と学習 (OR)

### 1. シグモイド関数
$$
    \sigma({\bf x}) = \frac{1}{1 + \exp(-{\bf x})}
$$

In [2]:
def sigmoid(x):
#     return 1 / (1 + np.exp(- x))
    return np.tanh(x * 0.5) * 0.5 + 0.5 # numpy組み込みのtanhを利用 (expのoverflowを防ぐ)

### 2. データセットの設定と重みの定義

In [3]:
# ORのデータセット
x_train_or = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
t_train_or = np.array([[1], [1], [0], [1]])
x_valid_or, t_valid_or = x_train_or, t_train_or
x_test_or, t_test_or = x_train_or, t_train_or

# 重み (入力の次元数: 2, 出力の次元数: 1)
W_or = np.random.uniform(low=-0.08, high=0.08, size=(2, 1)).astype('float32')
b_or = np.zeros(shape=(1,)).astype('float32')

### 3. train関数とvalid関数

#### 3.1. 誤差関数
- 負の対数尤度 (交差エントロピー)

$$ E ({\bf x}, {\bf t}; {\bf W}, {\bf b} ) =  -\frac{1}{N}\sum^N_{i=1} \left[ {\bf t}_i \log {\bf y}_i ({\bf x}_i; {\bf W}, {\bf b}) + (1 - {\bf t}_i) \log \{ 1 - {\bf y}_i ({\bf x}_i; {\bf W}, {\bf b}) \}\right] $$

#### 3.2. 順伝播
$$
    {\bf y} = \sigma({\bf W}^{\mathrm{T}} {\bf x} + {\bf b})
$$

#### 3.3. 逆伝播
\begin{align*}
    \delta &= {\bf y} - {\bf t} \\
    \nabla_{\bf W} E &= \frac{1}{N}\delta {\bf x}^{\mathrm{T}} \\
    \nabla_{\bf b} E &= \frac{1}{N}\delta \mathbb{1}_N \\
    {\bf W} &\leftarrow {\bf W} - \epsilon \nabla_{\bf W} E \\
    {\bf b} &\leftarrow {\bf b} - \epsilon \nabla_{\bf b} E \\
\end{align*}

In [4]:
# logの中身が0になるのを防ぐ
def np_log(x):
    return np.log(np.clip(a=x, a_min=1e-10, a_max=x))

In [5]:
def train_or(x, t, eps=1.0):
    """
    :param x: np.ndarray, 入力データ, shape=(batch_size, 入力の次元数)
    :param t: np.ndarray, 教師ラベル, shape=(batch_size, 出力の次元数)
    :param eps: float, 学習率
    """
    global W_or, b_or
    
    batch_size = x.shape[0]
    
    # 順伝播
    y = sigmoid(np.matmul(x, W_or) + b_or) # shape: (batch_size, 出力の次元数)
    
    # 逆伝播
    cost = (- t * np_log(y) - (1 - t) * np_log(1 - y)).mean()
    delta = y - t # shape: (batch_size, 出力の次元数)
    
    # パラメータの更新
    dW = np.matmul(x.T, delta) / batch_size # shape: (入力の次元数, 出力の次元数)
    db = np.matmul(np.ones(shape=(batch_size,)), delta) / batch_size # shape: (出力の次元数,)
    W_or -= eps * dW
    b_or -= eps * db

    return cost

def valid_or(x, t):
    y = sigmoid(np.matmul(x, W_or) + b_or)
    cost = (- t * np_log(y) - (1 - t) * np_log(1 - y)).mean()
    return cost, y

### 4. 学習

In [6]:
for epoch in range(1000):
    # オンライン学習
    x_train_or, t_train_or = shuffle(x_train_or, t_train_or)
    for x, t in zip(x_train_or, t_train_or):
        cost = train_or(x[None, :], t[None, :])
    cost, y_pred = valid_or(x_valid_or, t_valid_or)

print(y_pred)

[[0.9979988 ]
 [0.99799904]
 [0.00500219]
 [0.99999998]]


## 課題2. ソフトマックス回帰の実装と学習 (MNIST)

### 1. ソフトマックス関数
$$
    \mathrm{softmax}({\bf x})_k = \frac{\exp({\bf x}_k)}{\sum^K_{k'=1} \exp({\bf x}_{k'})} \hspace{10mm} \text{for} \, k=1,\ldots, K
$$

In [7]:
def softmax(x):
    x -= x.max(axis=1, keepdims=True) # expのunderflow & overflowを防ぐ
    x_exp = np.exp(x)
    return x_exp / np.sum(x_exp, axis=1, keepdims=True)

### 2. データセットの設定と重みの定義

In [8]:
# mnist = fetch_mldata(dataname='MNIST original')
mnist = fetch_openml(name='mnist_784') # 上の行でHTTPErrorが出る場合は代わりにこちらを実行してください (少し時間がかかります)

x_mnist = mnist.data.astype('float32') / 255.
t_mnist = np.eye(N=10)[mnist.target.astype('int32')]

x_train_mnist, x_test_mnist, t_train_mnist, t_test_mnist = train_test_split(x_mnist, t_mnist, test_size=10000)
x_train_mnist, x_valid_mnist, t_train_mnist, t_valid_mnist = train_test_split(x_train_mnist, t_train_mnist, test_size=10000)

In [9]:
# 重み (入力の次元数: 784, 出力の次元数: 10)
W_mnist = np.random.uniform(low=-0.08, high=0.08, size=(784, 10)).astype('float32')
b_mnist = np.zeros(shape=(10,)).astype('float32')

### 3. train関数とvalid関数

#### 3.1. 誤差関数
- 負の対数尤度 (多クラス交差エントロピー)
$$ E ({\bf x}, {\bf t}; {\bf W}, {\bf b} ) =  -\frac{1}{N}\sum^N_{i=1} \sum^K_{k=1} {\bf t}_{i, k} \log {\bf y}_{i, k} ({\bf x}_i; {\bf W}, {\bf b}) $$

#### 3.2. 順伝播
$$
    {\bf y} = \mathrm{softmax}({\bf W}^{\mathrm{T}}{\bf x} + {\bf b})
$$

#### 3.3. 逆伝播
\begin{align*}
    \delta &= {\bf y} - {\bf t} \\
    \nabla_{\bf W} E &= \frac{1}{N}\delta {\bf x}^{\mathrm{T}} \\
    \nabla_{\bf b} E &= \frac{1}{N}\delta \mathbb{1}_N \\
    {\bf W} &\leftarrow {\bf W} - \epsilon \nabla_{\bf W} E \\
    {\bf b} &\leftarrow {\bf b} - \epsilon \nabla_{\bf b} E \\
\end{align*}

In [10]:
def train_mnist(x, t, eps=1.0):
    """
    :param x: np.ndarray, 入力データ, shape=(batch_size, 入力の次元数)
    :param t: np.ndarray, 教師ラベル, shape=(batch_size, 出力の次元数)
    :param eps: float, 学習率
    """
    global W_mnist, b_mnist
    
    batch_size = x.shape[0]
    
    # 順伝播
    y = softmax(np.matmul(x, W_mnist) + b_mnist) # shape: (batch_size, 出力の次元数)
    
    # 逆伝播
    cost = (- t * np_log(y)).sum(axis=1).mean()
    delta = y - t # shape: (batch_size, 出力の次元数)
    
    # パラメータの更新
    dW = np.matmul(x.T, delta) / batch_size # shape: (入力の次元数, 出力の次元数)
    db = np.matmul(np.ones(shape=(batch_size,)), delta) / batch_size # shape: (出力の次元数,)
    W_mnist -= eps * dW
    b_mnist -= eps * db

    return cost

def valid_mnist(x, t):
    y = softmax(np.matmul(x, W_mnist) + b_mnist)
    cost = (- t * np_log(y)).sum(axis=1).mean()
    
    return cost, y

### 4. 学習

In [11]:
for epoch in range(3):
    # オンライン学習
    x_train_mnist, t_train_mnist = shuffle(x_train_mnist, t_train_mnist)
    for x, t in zip(x_train_mnist, t_train_mnist):
        cost = train_mnist(x[None, :], t[None, :])
    cost, y_pred = valid_mnist(x_valid_mnist, t_valid_mnist)
    print('EPOCH: {}, Valid Cost: {:.3f}, Valid Accuracy: {:.3f}'.format(
        epoch + 1,
        cost,
        accuracy_score(t_valid_mnist.argmax(axis=1), y_pred.argmax(axis=1))
    ))

EPOCH: 1, Valid Cost: 2.649, Valid Accuracy: 0.866
EPOCH: 2, Valid Cost: 2.224, Valid Accuracy: 0.887
EPOCH: 3, Valid Cost: 2.220, Valid Accuracy: 0.888
