## 下ごしらえ

In [0]:
import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from datetime import datetime
import time
import sys
# ライブラリまでのディレクトリ定義
#sys.path.append('../ml-scratch/utils') 
sys.path.append('../') # colaboratory用

import fc, get_mini_batch, relu, soft_max

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 前処理
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

# 学習データをスプリット
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

tstX = X_train[:2, np.newaxis, :, :]
#print(tstX)
#print(tstX.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


## 方針
- 一度ライブラリなど使って作りきってみる(im2col, col2im)  
→ 出力された値を正にして、どういった操作が必要になるかを後追いしたい

## 【問題1】2次元畳み込み層の作成


In [0]:
import numpy as np

class Conv2d:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        self.x = None   
        self.col = None
        self.col_W = None
        self.dW = None
        self.db = None

        
    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = calc_out_h(H, FH, self.stride, self.pad)
        out_w = calc_out_w(W, FW, self.stride, self.pad)

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T

        A = np.dot(col, col_W) + self.b
        A = A.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.x = x
        self.col = col
        self.col_W = col_W

        return A

      
    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0,2,3,1).reshape(-1, FN)

        self.dW = np.dot(self.col.T, dout)

        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
        self.db = np.sum(dout, axis=0)
        
        dcol = np.dot(dout, self.col_W.T)

        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        # self.W self.Bの更新
        self = self.optimizer.update(self)
        
        return dx

In [0]:
import numpy as np

class ConvSGD:
    """
    確率的勾配降下法
    Parameters
    ----------
    lr : 学習率
    """
    
    def __init__(self, lr):
        self.lr = lr
        
        
    def update(self, layer):
        """
        ある層の重みやバイアスの更新
        Parameters
        ----------
        layer : 更新前の層のインスタンス

        Returns
        ----------
        layer : 更新後の層のインスタンス
        """
        
        layer.W -= self.lr * layer.dW                # 重さ更新

        layer.b -= self.lr * layer.db                   # バイアス更新
        
        return layer

In [0]:
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_data.shape
    out_h = calc_out_h(H, filter_h, stride, pad)
    out_w = calc_out_w(W, filter_w, stride, pad)

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

  
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_shape
    out_h = calc_out_h(H, filter_h, stride, pad)
    out_w = calc_out_w(W, filter_w, stride, pad)
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            img[:, :, y:y_max:stride, x:x_max:stride] = col[:, :, y, x, :, :]

    return img[:, :, pad:H + pad, pad:W + pad]

## 【問題2】2次元畳み込み後の出力サイズ

In [0]:
def calc_out_h(H, filter_h, stride, pad):
    return (H + 2*pad - filter_h)//stride + 1
    
    
def calc_out_w(W, filter_w, stride, pad):
    return (W + 2*pad - filter_w)//stride + 1

In [7]:
print(calc_out_h(28, 3, 1, 0))
print(calc_out_w(25, 3, 3, 2))

26
9


In [39]:
X = X_train[:2, np.newaxis, :, :]
print(X.shape)
W = np.ones((4, 1, 3, 3)).astype(np.float)
b = np.array([1, 2, 3, 4]).astype(np.float)

c2d = Conv2d(W, b)
A = c2d.forward(X)

y = np.array([[20., 25., 35., 30.],[25., 30., 40., 20.]])

dX = c2d.backward(A)

print(A.shape)
print(dX.shape)
print(c2d.W.shape)
print(c2d.b.shape)
#print('A : %s  shape : %s' % (A, A.shape))
#print('dX : %s  shape : %s' % (dX, dX.shape))

(2, 1, 28, 28)
(2, 4, 26, 26)
(2, 1, 28, 28)
(4, 1, 3, 3)
(4,)


## 【問題3】最大プーリング層の作成

In [0]:
class MaxPool2D:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

        self.x = None
        self.arg_max = None

        
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)

        arg_max = np.argmax(col, axis=1)
        out = np.max(col, axis=1)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        self.x = x
        self.arg_max = arg_max

        return out

      
    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)

        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 

        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)

        return dx

In [61]:
mp2d = MaxPool2D(2, 2, 2)
A2 = mp2d.forward(A)
hoge = mp2d.forward(A2)
print(A2.shape)
print(hoge.shape)

# プーリング処理後の計算
print(int((26 - 2 + 2 * 0) / 2 + 1))
print(int((13 - 2 + 2 * 0) / 2 + 1))

(2, 4, 13, 13)
(2, 4, 6, 6)
13
6


## 【問題4】平滑化

In [0]:
import numpy as np

class Flatten():
    def __init__(self):
        self.X = None
        self.S = None
        self.C = None
        self.H = None
        self.W = None
        
        
    def forward(self, X):
        self.S, self.C, self.H, self.W = X.shape     # shapeを記録
        self.X = X.reshape(self.S, -1)                    # 4d to 2d
        
        return self.X

      
    def backward(self, dout):
        return dout.reshape((self.S, self.C, self.H, self.W))

In [35]:
ft = Flatten()
A3 = ft.forward(A2)
print(A3.shape)
fuga = ft.backward(A3)
print(fuga.shape)

(2, 1875)
(2, 3, 25, 25)


## (重みの初期化)

In [0]:
import numpy as np

class ConvHeInitializer:
    """
    Heによる初期化
    """
    
    def W(self, FN, C, FH, FW):
        """
        重みの初期化
        Parameters
        ----------
        n_nodes1 : int
          前の層のノード数
        n_nodes2 : int
          後の層のノード数

        Returns
        ----------
        W : 次の形のndarray, shape (n_nodes1, n_nodes2)
          重さ
        """
        
        W = np.random.randn(FN, C, FH, FW) / np.sqrt(FN) * np.sqrt(2)

        return W
    
    
    def B(self, FN):
        """
        バイアスの初期化
        Parameters
        ----------
        n_nodes2 : int
          後の層のノード数

        Returns
        ----------
        B : 次の形のndarray, shape (FN,)
          バイアス
        """
        
        B = np.zeros(FN)

In [56]:
chi = ConvHeInitializer()
#print(chi.W(3, 1, 3, 3))
print(chi.W(3, 1, 3, 3).shape)

(3, 1, 3, 3)


## 【問題5】学習・推定
## 【問題6】（アドバンス課題）LeNet

In [0]:
import numpy as np
import logging                                                                     # ログ
from datetime import datetime                                        # 時間のやつ
from sklearn.preprocessing import OneHotEncoder       # ワンホットのやつ
from tqdm import tqdm                                                     # 進捗バーを出してくれるやつ


class Scratch2dCNNClassifier():
    """
    ニューラルネットワーク分類器
    """

    def __init__(self, batch_size=100, n_epochs=3,  n_nodes=[120, 84], 
                            n_cvlayer=2, n_fclayer=2, verbose=True, sigma=1e-2, lr=1e-2, 
                            filter_n=[6, 16], filter_w=5, filter_h=5, stride=1, pad=0, pool_size=2, 
                            activation=None, optimizer='sgd'):

        self.batch_size = batch_size     # バッチサイズ
        self.n_epochs = n_epochs         # エポック数 
        self.n_nodes = n_nodes             # ノード数
        self.n_cvlayer = n_cvlayer         # 畳み込み層の数
        self.n_fclayer = n_fclayer           # 全結合層の数
        self.verbose = verbose                # 学習過程出力フラグ
        self.activation = activation         #活性化関数(文字列)
        self.filter_n = filter_n                  # フィルタ枚数
        self.filter_w = filter_w                 # フィルタwidth
        self.filter_h = filter_h                   # フィルタheight
        self.stride = stride                        # ストライド数
        self.pad = pad                               # パディング数
        self.pool_size = pool_size           # プーリングサイズ
        self.cv_instances = []                  # 畳み込み層インスタンス格納用
        self.fc_instances = []                  # 全結合層インスタンス格納用
        self.cv_activations = []              # 畳み込み層活性化関数インスタンス格納用
        self.fc_activations = []               # 全結合層活性化関数インスタンス格納用
        self.pool_instances = []              # プーリング層インスタンス格納用
        self.loss_ = []                              # 学習用データの学習過程格納用
        self.loss_val_ = []                       # 検証用データの学習過程格納用
        self.ft = Flatten()
        
        # 初期化・最適化クラスインスタンス作成
        if activation == 'relu':
            self.cv_initializer = ConvHeInitializer()
            self.fc_initializer = he_initializer.HeInitializer()
        else:
            # TODO:作ってないので割愛
        
        if optimizer == 'sgd':
            self.cv_optimizer = ConvSGD(lr)
            self.fc_optimizer = sgd.SGD(lr)
        elif optimizer == 'adagrad':
            # TODO:計算が遅くなるので割愛
            
        # ワンホットライブラリのインスタンス作成
        self.enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
        
        # ログレベルを DEBUG に変更
        time_stamp = datetime.now().strftime('%Y%m%d')
        logging.basicConfig(filename='../tmp/sprint13_' + time_stamp + '.log', level=logging.DEBUG)
        
    
    def fit(self, X, y, X_val=None, y_val=None):
        """
        ニューラルネットワーク分類器を学習する。

        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            学習用データの特徴量
        y : 次の形のndarray, shape (n_samples, )
            学習用データの正解値
        X_val : 次の形のndarray, shape (n_samples, n_features)
            検証用データの特徴量
        y_val : 次の形のndarray, shape (n_samples, )
            検証用データの正解値
        """
        
        # ワンホット化
        y = self.enc.fit_transform(y[:, np.newaxis])
        
        # 検証用データがある場合
        if X_val is not None:
            y_val= self.enc.fit_transform(y_val[:, np.newaxis])

        # 初期化
        N, C, H, W = X.shape
        n_nodes1 = self.n_input[0]
        n_features = W
        n_output = y.shape[1]
        
        # Convインスタンス作成
        for i in range(self.n_cvlayer):
            W = self.cv_initializer.W(self.filter_n[i], C, self.filter_h, self.filter_w)
            b = self.cv_initializer.B(self.filter_n)
            ins = Conv2d(W, b, self.cv_optimizer)

            # 活性化インスタンス
            if self.activation == 'sigmoid':
                act = sigmoid.Sigmoid()
            elif self.activation == 'tanh':
                act = tanh.Tanh()
            elif self.activation == 'relu':
                act = relu.ReLU()
                
            # プーリングインスタンス
            mp2d = MaxPool2D(self.pool_size, self.pool_size, self.pool_size)
                
            self.cv_instances.append(ins)                           # 各自格納
            self.cv_activations.append(act)
            self.pool_instances.append(mp2d)
            
            # プーリング層出力サイズの計算
            n_features = (n_features - self.filter_w + 2 * self.pad) // self.stride + 1
        
        # FCインスタンス作成
        for i in range(self.n_fclayer):
            if i == (self.n_fclayer - 1):     # 出力層
                fc = FC(self.n_nodes[i], n_output, self.initializer, self.optimizer)
                activation = Softmax()
            else:
                if i == 0:                       # 入力層
                    fc = FC(n_features, self.n_nodes[i], self.initializer, self.optimizer)
                else:
                    fc = FC(self.n_nodes[i - 1], self.n_nodes[i], self.initializer, self.optimizer)
                
                # 出力層以外は指定された活性化関数をインスタンス化
                if self.activation == 'sigmoid':
                    act = sigmoid.Sigmoid()
                elif self.activation == 'tanh':
                    act = tanh.Tanh()
                elif self.activation == 'relu':
                    act = relu.ReLU()
                
            self.fc_instances.append(ins)                           # 各自格納
            self.fc_activations.append(act)


        for e in tqdm(range(self.n_epochs)):
            # ミニバッチ化
            gmb = get_mini_batch.GetMiniBatch(X, y, batch_size=self.batch_size)
            # ロス格納用
            loss_ary = []
            
            # Xのn_samples / batch_size数分ループ処理
            for mini_X_train, mini_y_train in gmb:
                # forward propagation
                for i in range(self.layer):
                    if i == 0:                              # 入力層
                        A = self.layer_instances[i].forward(mini_X_train)
                        A = A.ravel()[np.newaxis, :]
                        Z = self.activations[i].forward(A)
                    else:
                        A = self.layer_instances[i].forward(Z)
                        Z = self.activations[i].forward(A)
                
                # back propagation
                for i in range(self.layer):
                    n_FC = self.layer - i - 1      # インスタンス逆指定用
                    if i == 0:                               # 出力層
                        dA, loss = self.activations[n_FC].backward(Z, mini_y_train)
                        loss_ary.append(loss)
                    else:                                     # 入力層
                        dA = self.activations[n_FC].backward(dZ)
                        
                    dZ = self.layer_instances[n_FC].backward(dA)

                    
            #誤差を格納
            self.loss_.append(np.mean(loss_ary))
                        
            # 検証用データがある場合
            if X_val is not None:
                # forward propagation
                for i in range(self.layer):
                    if i == 0:                              # 入力層
                        A = self.layer_instances[i].forward(X_val)
                        A = A.ravel()[np.newaxis, :]
                        Z = self.activations[i].forward(A)
                    else:
                        A = self.layer_instances[i].forward(Z)
                        Z = self.activations[i].forward(A)
                
                dA, loss_val = self.activations[self.layer-1].backward(Z, y_val)

                #誤差を格納
                self.loss_val_.append(np.mean(loss_val))
                            

            # フラグがTrueであればログ出力
            if self.verbose:
                logging.info('forward propagation %sエポック目 sum: %s shape: %s', e+1, np.sum(A), A.shape)
                logging.info('forward propagation %sエポック目 sum: %s shape: %s', e+1, np.sum(Z), Z.shape)
                logging.info('backward propagation %sエポック目 sum: %s shape: %s', e+1, np.sum(dA), dA.shape)
                logging.info('backward propagation %sエポック目 sum: %s shape: %s', e+1, np.sum(dZ), dZ.shape)
                logging.info('loss %sエポック目 : %s', e+1, np.sum(loss))

        return self


    def predict(self, X, y):
        """
        ニューラルネットワーク分類器を使い推定する。

        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            学習データ

        Returns
        -------
        y_pred :  次の形のndarray, shape (n_samples, 1)
            推定結果
        """
        
        # ミニバッチ化
        gmb = get_mini_batch.GetMiniBatch(X, y, batch_size=self.batch_size)
        # ロス格納用
        y_pred = np.empty(len(y))
        print(y_pred.shape)
        cnt = 0

        # Xのn_samples / batch_size数分ループ処理
        for mini_X_train, _ in gmb:
            for i in range(self.layer):
                if i == 0:                              # 入力層
                    A = self.layer_instances[i].forward(mini_X_train)
                    A = A.ravel()[np.newaxis, :]
                    Z = self.activations[i].forward(A)
                else:
                    A = self.layer_instances[i].forward(Z)
                    Z = self.activations[i].forward(A)
                
            y_pred[cnt] = np.argmax(Z[0], axis=0)
            cnt += 1
            
        # 一番確率が高いラベルを予測値に
        
        
        return y_pred

NameError: ignored