# 第4回講義 演習

## 課題1. ロジスティック回帰の実装と学習

In [34]:
from __future__ import division
from sklearn.utils import shuffle

import numpy as np

np.random.seed(1234)

### 1. LogisticRegressionクラス

In [35]:
#--- LogisticRegression
class LogisticRegression:
    #- Constructor
    def __init__(self, in_dim, out_dim, function, deriv_function):
        self.W = np.random.uniform(low=-0.08, high=0.08, size=(in_dim, out_dim)).astype("float32")
        self.b = np.zeros(out_dim).astype("float32")
        self.function = function
        self.deriv_function = deriv_function
        self.u = None
        self.delta = None

    #- Forward Propagation
    def f_prop(self, x):
        self.u = np.dot(x, self.W) + self.b
        self.z = self.function(self.u)
        return self.z

### 2. シグモイド関数とその微分

In [36]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

### 3. データセットの設定とモデルの定義

In [37]:
#- OR
train_X = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
train_y = np.array([[1], [1], [0], [1]])
test_X, test_y = train_X, train_y

model = LogisticRegression(2, 1, sigmoid, deriv_sigmoid)

### 4. train関数とtest関数

##### 誤差関数
* 負の対数尤度関数 (Negative Loglikelihood Function）
* 交差エントロピーともいう

$$ E ( {\bf \theta} ) =  -\sum^N_{i=1} \left[ t_i \log y ({\bf x}_i ; {\bf \theta}) + (1 - t_i) \log \{ 1 - y ({\bf x}_i ; {\bf \theta}) \}\right] $$

In [38]:
def train(X, t, eps=1.0):
    #- Forward Propagation
    y = model.f_prop(X)
    
    #- Cost Function & Delta
    cost = np.sum(-t*np.log(y) - (1 - t)*np.log(1 - y)) # Negative Loglikelihood
    delta = y - t
    
    #- Back Propagation
    model.delta = delta
    
    #- Update Parameters
    z = X
    dW = np.dot(z.T, model.delta)
    db = np.dot(np.ones(len(z)), model.delta)
    model.W = model.W - eps*dW
    model.b = model.b - eps*db
    
    #- Train Cost
    y = model.f_prop(X)
    cost = np.sum(-t*np.log(y) - (1 - t)*np.log(1 - y))
    return cost

def test(X, t):
    #- Test Cost
    y = model.f_prop(X)
    cost = np.sum(-t*np.log(y) - (1 - t)*np.log(1 - y))
    return cost, y

### 5. 学習

In [39]:
#- Epoch
for epoch in xrange(1000):
    #- Online Learning
    for x, y in zip(train_X, train_y):
        cost = train(x[np.newaxis, :], y[np.newaxis, :])
    cost, pred_y = test(test_X, test_y)
    
print pred_y

[[ 0.99799688]
 [ 0.99798893]
 [ 0.00499169]
 [ 0.99999998]]


## 課題2. 活性化関数とその微分の実装

### 1. シグモイド関数とその微分

In [40]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def deriv_sigmoid(x):
    return sigmoid(x)*(1 - sigmoid(x))

### 2. ソフトマックス関数とその微分

In [41]:
def softmax(x):
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x, axis = 1, keepdims = True)

def deriv_softmax(x):
    return softmax(x) * (1 - softmax(x))

### 3. tanh関数とその微分

In [42]:
def tanh(x):
    return np.tanh(x)

def deriv_tanh(x):
    return 1 - tanh(x)**2

## 課題3. 多層パーセプトロンの実装と学習

### 1. Layerクラス

In [43]:
#--- Layer
class Layer:
    #- Constructor
    def __init__(self, in_dim, out_dim, function, deriv_function):
        self.W = np.random.uniform(low=-0.08, high=0.08, size=(in_dim, out_dim)).astype("float32")
        self.b = np.zeros(out_dim).astype("float32")
        self.function = function
        self.deriv_function = deriv_function
        self.u = None
        self.delta = None

    #- Forward Propagation
    def f_prop(self, x):
        self.u = np.dot(x, self.W) + self.b
        self.z = self.function(self.u)
        return self.z
    
    #- Back Propagation
    def b_prop(self, delta, W):
        self.delta = np.dot(delta, W.T)*self.deriv_function(self.u)
        return self.delta

### 2. ネットワーク全体の順伝播

In [44]:
def f_props(layers, x):
    z = x
    for layer in layers:
        z = layer.f_prop(z)
    return z

### 3. ネットワーク全体の誤差逆伝播

In [45]:
def b_props(layers, delta):
    for i, layer in enumerate(layers[::-1]):
        if i == 0:
            layer.delta = delta
        else:
            delta = layer.b_prop(delta, _W)
        _W = layer.W

### 4. データセットの設定とネットワークの定義

In [46]:
#- XOR
train_X = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
train_y = np.array([[1], [1], [0], [0]])
test_X, test_y = train_X, train_y

layers = [Layer(2, 3, sigmoid, deriv_sigmoid),
          Layer(3, 1, sigmoid, deriv_sigmoid)]

### 5. train関数とtest関数

##### 誤差関数
* 負の対数尤度関数 (Negative Loglikelihood Function）
* 交差エントロピーともいう

$$ E ( {\bf \theta} ) =  -\sum^N_{i=1} \left[ t_i \log y ({\bf x}_i ; {\bf \theta}) + (1 - t_i) \log \{ 1 - y ({\bf x}_i ; {\bf \theta}) \}\right] $$

In [47]:
def train(X, t, eps=1.0):
    #- Forward Propagation
    y = f_props(layers, X)
    
    #- Cost Function & Delta
    cost = np.sum(-t*np.log(y) - (1 - t)*np.log(1 - y)) # Negative Loglikelihood
    delta = y - t
    
    #- Back Propagation
    b_props(layers, delta)
    
    #- Update Parameters
    z = X
    for i, layer in enumerate(layers):
        dW = np.dot(z.T, layer.delta)
        db = np.dot(np.ones(len(z)), layer.delta)
        layer.W = layer.W - eps*dW    
        layer.b = layer.b - eps*db
        z = layer.z
        
    #- Train Cost
    y = f_props(layers, X)
    cost = np.sum(-t*np.log(y) - (1 - t)*np.log(1 - y))
    return cost

def test(X, t):
    #- Test Cost
    y = f_props(layers, X)
    cost = np.sum(-t*np.log(y) - (1 - t)*np.log(1 - y))
    return cost, y

### 6. 学習

In [48]:
#- Epoch
for epoch in xrange(2000):
    #- Online Learning
    for x, y in zip(train_X, train_y):
        cost = train(x[np.newaxis, :], y[np.newaxis, :])
    cost, pred_y = test(test_X, test_y)
    
print pred_y

[[ 0.99788042]
 [ 0.99785468]
 [ 0.00182329]
 [ 0.0034414 ]]
