### DNNスクラッチコード

### データのロード

mnistの手書き文字認識問題

784ピクセルの値（0-255）から数字（0-9）を分類する。

In [1]:
import numpy as np
from keras.datasets import mnist
import gc
from sklearn.model_selection import train_test_split

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# プロトタイプなので100データだけ使用する
x_train = x_train[:100]
y_train = y_train[:100]
y_label = y_train
y_train = np.identity(10)[y_train]
del x_test, y_test


# 訓練とテストデータに分割
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=0)
gc.collect()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


12

In [2]:
# カラー画像のデータセット
from keras.datasets import cifar10

(x_train_cifar10, y_train_cifar10), (x_test_cifar10, y_test_cifar10) = cifar10.load_data()

# プロトタイプなので100データだけ使用する
x_train_cifar10 = x_train_cifar10[:100]
y_train_cifar10 = y_train_cifar10[:100]
y_label_cifar10 = y_train_cifar10
y_train_cifar10 = np.identity(10)[y_train_cifar10]
del x_test_cifar10, y_test_cifar10

gc.collect()

print('x_train : {}'.format(x_train_cifar10.shape))
print('y_train : {}'.format(y_train_cifar10.shape))
print('y_label : {}'.format(y_label_cifar10.shape))

x_train : (100, 32, 32, 3)
y_train : (100, 1, 10)
y_label : (100, 1)


### 正答率を算出する関数

In [3]:
# # 正答率を算出する
# def accuracy_score(X, y, params):
#     y_pred =  predict(X, params)
#     y_pred_number = np.argmax(y_pred, axis=1)
    
#     y_label = np.argmax(y, axis=1)
    
#     data_size = X.shape[0]
    
#     correct_count = np.sum([y_label == y_pred_number]) 
#     score = correct_count / data_size * 100
    
#     return round(score, 2)

### レイヤークラス

親クラスとしてlayerクラスを作成。

以後、各機能のレイヤークラスはこの親クラスを継承することにする。


In [113]:
class Layer:
    def __init__(self, params={}):
        if 'input_size' in params:
            self.input_shape = params['input_shape']
        else:
            self.input_shape = 0
            
        if 'output_size' in params:
            self.output_shape = params['output_shape']
        else:
            self.output_shape = 0

### 最終層

分類問題用の最終層クラス（ソフトマックスとクロスエントロピーの結合層）

TODO: 正則化項がついていない。　すべての層の重みWを集計するメソッドをNetworkクラスに作成してそれを呼ばなくてはいけないため後回し。

回帰問題用に別クラスが必要になる。



### ソフトマックス関数

In [5]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

### コスト関数

In [6]:
def cross_entropy_error(y, y_pred):
    
    data_size = y.shape[0]

    # クロスエントロピー誤差関数　y_predは０になりえるので -inf にならないためにすごく小さい補正値を入れる
    cross_entorpy = -np.sum(y * np.log(y_pred + 1e-7))
    
    error = cross_entorpy  / data_size
    return error

In [7]:
# 以前の正則化項つきのクロスエントロピー関数　いずれ使うのでそのままにしておく。
# def cost(y, y_pred, params, lam=0.01):
#     data_size = y.shape[0]
#     #  正則化項
#     weight_sum = sum([np.sum(matrix**2) for key, matrix in params.items() if "W" in key])
#     reg_term = (lam /2) * (weight_sum)
#     # クロスエントロピー誤差関数　y_predは０になりえるので -inf にならないためにすごく小さい補正値を入れる
#     cross_entorpy = -np.sum(y * np.log(y_pred + 1e-7))
    
#     cost = (cross_entorpy + reg_term) / data_size
#     return cost

In [8]:
class SoftmaxWithLoss(Layer):
    def __init__(self, params={}):
        super(SoftmaxWithLoss, self).__init__(params)
        self.loss = None # 損失関数
        self.y = None       # softmaxの出力
        self.t = None       # 教師データ（one-hot vector)
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss
    
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size # delta3に相当
        
        return dx

### アフィン変換層 (xW + b)

更新手法を
- sgd
- adagrad
- adam

と切り替えられる。

In [9]:
class Affine(Layer):
    def __init__(self, W, b, params= {}):
        super(Affine, self).__init__(params)
        self.W = W
        self.b = b
        self.x = None
        # パラメータの微分値
        self.dW = None
        self.db = None
        # 学習率のセット
        if 'lr' in params:
            self.lr = params['lr']
        else:
            self.lr = 0.01
        
        # 更新式のスイッチング
        # optimizeメソッドをoptimizerによって切り替える。
        if params['optimizer']=='sgd':
            self.optimize = self.update_sgd
        elif params['optimizer'] == 'adagrad':
            self.h = np.zeros_like(W)
            self.optimize = self.update_adagrad
        else: # params['optimizer'] == 'adam':
            self.m = np.zeros_like(W)
            self.v = np.zeros_like(W)
            self.beta1 = 0.9
            self.beta2 = 0.999
            self.optimize = self.update_adam
        
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out
    
    def backward(self, dout=1):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        return dx
    
    def update_sgd(self):
        self.W -= self.lr * self.dW
        self.b -= self.lr * self.db
    
    # adagrad 少しずつ更新量が減っていく
    def update_adagrad(self, lr = 0.01):
        self.h += self.dW ** 2
        self.W -= self.lr * self.dW / (np.sqrt(self.h) + 1e-7)
        self.b -= self.lr * self.db
        
    
    def update_adam(self, lr = 0.01):
        self.m = self.beta1 * self.m + (1- self.beta1) * self.dW
        self.v = self.beta2 * self.v + (1- self.beta2) * (self.dW * self.dW)
        
        m_hat = self.m / (1 - self.beta1)
        v_hat = self.v / (1 - self.beta2)
        
        self.W -= self.lr * m_hat / (np.sqrt(v_hat) + 1e-8)
        self.b -= self.lr * self.db
        

### 活性化関数層

活性化関数層は

- ReLU
- tanh
- シグモイド関数

を切り替えて使用できる。

In [114]:
class Activation(Layer):
    '''
    活性化関数を設定できる 
    'tanh'
    'sigmoid'
    'relu'
    '''
    def __init__(self, params):
        super(Activation, self).__init__(params)
        self.out = None
        self.mask = None
        # optimizeメソッドを
        if params['activation']=='tanh':
            self.forward = self.forward_tanh
            self.backward = self.backward_tanh
        elif params['activation'] == 'sigmoid':
            self.forward = self.forward_sigmoid
            self.backward = self.backward_sigmoid
        else: # params['activation'] == 'relu':
            self.forward = self.forward_relu
            self.backward = self.backward_relu
     
    def forward_relu(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        
        return out
    
    def backward_relu(self, dout):
        dout[self.mask] = 0
        dx = dout
        
        return dx

    # tanh 
    def forward_tanh(self, x):
        out = np.tanh(x)
        self.out = out
        
        return out
    
    def backward_tanh(self, dout):
        dx = dout * (1 - np.tanh(dout)**2)
        
        return dx
    
    # sigmoid関数
    def forward_sigmoid(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        
        return out
    
    def backward_sigmoid(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        
        return dx
        

### バッチノーマリゼーション層

In [11]:
class BatchNorm(Layer):
    def __init__(self, params):
        super(BatchNorm, self).__init__(params)
        self.out = None
        self.beta = 0.0
        self.gamma = 1.0
        self.lr = params['lr']
        self.eps = 1e-8
    
        '''
        計算式は下記を参照
        https://kratzert.github.io/2016/02/12/understanding-the-gradient-flow-through-the-batch-normalization-layer.html
        '''
    def forward(self, x):
        #data_size, input_size = x.shape
        
        # 単に標準化する
        # out = (x - np.mean(x, axis=0)) / np.var()
        
        # step1: 平均を求める
        mu = np.mean(x, axis=0)
        
        # step2: 偏差
        self.xmu = x - mu
        
        # step3 : 偏差の２乗
        sq = self.xmu ** 2
        
        # step4 : 分散を求める
        self.var = np.var(x, axis=0)
        
        # step5 : 分散のルートを取った値を求める
        self.sqrtvar = np.sqrt(self.var + self.eps)
        
        # step6 : sqrtvarの逆数（invert）
        self.ivar = 1.0/ self.sqrtvar
        
        # step7 : 標準化した値
        self.xhat = self.xmu * self.ivar
        
        # step8
        gammax = self.gamma * self.xhat
        
        # step9
        out = gammax + self.beta
        
        return out

    
    def backward(self, dout=1):
        
        #get the dimensions of the input/output
        N, D = dout.shape
        
        # step9
        self.d_beta = np.sum(dout, axis=0)
        dgammax = dout #not necessary, but more understandable
        
        # step8
        self.d_gamma = np.sum(dgammax*self.xhat, axis=0)
        dxhat = dgammax * self.gamma
        
        # step7
        divar = np.sum(dxhat*self.xmu, axis=0)
        dxmu1 = dxhat * self.ivar
        
        # step6
        dsqrtvar = -1. /(self.sqrtvar**2) * divar
        
        # step5
        dvar = 0.5 * 1. / np.sqrt(self.var+self.eps) * dsqrtvar
        
        # step4
        dsq = 1. / N * np.ones((N, D)) * dvar
        
        # step3
        dxmu2 = 2 * self.xmu * dsq
        
        # step2
        dx1 = (dxmu1 + dxmu2)
        dmu = -1 * np.sum(dxmu1 + dxmu2, axis=0)
        
        # step1
        dx2 = 1. / N * np.ones((N, D)) * dmu
        
        # step0
        dx = dx1 + dx2
        
        return dx

    
    def optimize(self):
        self.gamma -= self.lr * self.d_gamma
        self.beta -= self.lr * self.d_beta
        

### ドロップアウト層

In [12]:
class Dropout(Layer):
    def __init__(self, params):
        super(Dropout, self).__init__(params)
        if 'dropout_ratio' in params:
            self.dropout_ratio = params['dropout_ratio']
        else:
            self.dropout_ratio = 0.5
        self.mask = None
    
    def forward(self, x, train_flg=True):
        if train_flg :
            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
            return x * self.mask
        else:
            return x * (1 - self.dropout_ratio)
    
    def backward(self, dout):
        return dout * self.mask

### 複数層を束ねるクラス(ネットワーククラス)

改良したいところ

addで層を追加（好きに追加できる。）

各層の親クラスを作成


In [13]:
from collections import OrderedDict

class Layers:
    def __init__(self, params):
        unit_size_list = [params['input_size']]
        unit_size_list.extend(params['hidden_layer_list'])
        unit_size_list.append(params['output_size'])
        
        self.params = {}
        
        # レイヤの生成
        self.layers = OrderedDict()
        
        # とりあえずここにべた書きで書けるようにする
        
#         for i in range(1, len(unit_size_list)):
#             # 重みの初期化
#             init_W = np.random.randn(unit_size_list[i-1], unit_size_list[i])
#             init_b = np.zeros([1, unit_size_list[i]])
#             if params['init'] == 'gauss':
#                 init_W *= 0.01
#             elif params['init'] == 'xavier':
#                 init_W /= np.sqrt(unit_size_list[i-1])
#             else: # He
#                 init_W = init_W / np.sqrt(unit_size_list[i-1]) * np.sqrt(2) 
                
#             # アフィン変換層（Wx + b）を追加する
#             self.layers['Affine' + str(i)] = Affine(init_W, init_b, params)
            
#             # 最終層以外はバッチノーマリゼーション層と活性化関数層を追加する
#             if i < (len(unit_size_list)-1):
#                 if params['batch_norm'] == True:
#                     self.layers['BatchNorm' + str(i)] = BatchNorm(params)
#                 self.layers['Active' + str(i)] = Activation(params)
# #                 if params['dropout_ratio'] > 0:
# #                     self.layers['Dropout' + str(i)] = Dropout(params)
        
        self.lastLayer = SoftmaxWithLoss()
        
        # self.params['hidden_layer_num'] = len(unit_size_list)-1
        
    def predict(self, x):
        # forwardを繰り返す
        # ソフトマックスを通さなくても答えは出るのでこれで予測とする 
        # argmaxでラベルを取れる
        for layer in self.layers.values():
            x =layer.forward(x)

        return x

    def accuracy(self, x, t):
        # 正答率を小数点第二桁で出力する
        y_pred = self.predict(x)
        y_pred = np.argmax(y_pred, axis=1)
        y_true = np.argmax(t, axis=1)
        data_size = x.shape[0]

        correct_count = np.sum([y_true == y_pred]) 
        score = correct_count / data_size * 100

        return round(score, 2)
    
    def loss(self, x, t):
        y = self.predict(x)
        
        return self.lastLayer.forward(y, t)
    
    
    def optimize(self, x, t):
        
        # forward 
        self.loss(x, t)
        
        # backward
        dout = self.lastLayer.backward(1)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        # optimizeメソッドがある層は更新を行う
        # AffineとBatchNorm層のみ行うはず
        for layer in self.layers.values():
            if hasattr(layer, "optimize"):
                layer.optimize()
            

### TODO : Gradient Checkを導入する

In [14]:
from sklearn.model_selection import train_test_split


class DNN:
    def __init__(self, init='gauss', iteration = 500, lr = 0.05,  lam = 0.01, 
                 batch_mode = 'mini', activation='relu',
                 batch_size_rate = 0.1, hidden_layer_list = [5], optimizer='sgd',
                 batch_norm=False, dropout_ratio=0.0):
        """ ハイパーパラメータ解説
        init: 初期化方法
            'he' : 
            'gauss' 
            'xavier'
        lr : 学習率
        lam : 正則化項の率
        batch_size: バッチサイズ
            'batch' : フルサイズ
            'mini' 0< x< 1: フルサイズ割合 0.1なら全体の0.1サイズ使用する
            'online' : オンライン学習　１データのみ
        hidden_layer_list : 隠れ層のリスト、層のユニットをリストで入力　例[2, 3]　ユニット数２、ユニット数３の隠れ層
        optimizer : 勾配の更新手法
            'sgd' : 確率的勾配降下法
            'adam': 
            'adagrad':
        activation: 活性化関数の名前
            'relu' : ReLU関数
            'tanh' : tanh
            'sigmoid' : シグモイド関数
        """
        self.params = {}
        self.params['iteration'] = iteration
        self.params['init'] = init
        self.params['lr'] = lr
        self.params['lam'] = lam # 正則化項用の係数　今は使っていない
        self.params['batch_mode'] = batch_mode # データ数が決まったらそれに基づいて変更する
        self.params['batch_size_rate'] = batch_size_rate # ミニバッチ法のときのみ使用する
        self.params['hidden_layer_list'] = hidden_layer_list
        self.params['optimizer'] = optimizer
        self.params['batch_norm'] = batch_norm
        self.params['dropout_ratio'] = dropout_ratio
        self.params['activation'] = activation # 活性化関数
        
    def train(self, X, y, params={}):
        # 入力パラメータがあれば更新する
        for key in params:
                self.params[key] = params[key]
        
        # 正規化　必要？
        X = X / 255.0
        
        # 訓練とテストデータに分割
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=0.2, random_state=0)

        self.params['data_size'] = X_train.shape[0]
        self.params['input_size'] = X_train.shape[1]
        self.params['output_size'] = y_train.shape[1]

        
        # コストや正答率の学習曲線を引くためのリストを用意
        past_train_costs = []
        past_test_costs = []
        past_train_accuracy = []
        past_test_accuracy = []
        
        # 初期化
        # 重み初期化
        # バッチサイズの設定
        if self.params['batch_mode'] == 'batch':
            self.params['batch_size'] = self.params['data_size']
        elif self.params['batch_mode'] == 'mini':
            self.params['batch_size'] = int(self.params['data_size']  * self.params['batch_size_rate'] ) 
        else:
            self.params['batch_size'] = 1
        # 隠れ層やレイヤーインスタンス生成
        self.params['layer'] = Layers(self.params)
        
        
        # 何イテレーションで1エポックか
        epoch_per_i = int(self.params['data_size'] / self.params['batch_size'])
        
        ##################
        # 最急降下法での学習
        ##################
        for i in range(self.params['iteration']):
            
            # 学習に使用するデータをサンプリング
            choice_index = np.random.choice(self.params['data_size'], self.params['batch_size'])
            X_batch, y_batch = X_train[choice_index], y_train[choice_index]
            
            # 誤差逆伝播法によって勾配を求め、値を更新
            self.params['layer'].optimize(X_batch, y_batch)
            
            # 1エポックごとに正答率とコストを算出して保存する
            if i % epoch_per_i == 0:              
                past_train_accuracy.append(self.params['layer'].accuracy(X_train, y_train))
                past_test_accuracy.append(self.params['layer'].accuracy(X_test, y_test))
                
                past_train_costs.append(self.params['layer'].loss(X_train, y_train))
                past_test_costs.append(self.params['layer'].loss(X_test, y_test))
            
        return past_train_accuracy, past_test_accuracy, past_train_costs, past_test_costs 
    

        
    # 現在のパラメータで予測値を確率かラベルで出力する。
    def predict(self, X, probability=False):
        predict = self.params['layer'].predict(X, train_flg=False)
        predict_proba = softmax(predict)
        if probability== True:
            return predict_proba
        else:
            return np.argmax(predict_proba, axis=1)
        
    def plot_learning_curve(self, X, y, metrics='acc', params={}): 
        past_train_accuracy, past_test_accuracy, past_train_costs, past_test_costs = self.train(X, y, params)
        plt.figure(figsize=(6,4))
        # count_epoch = self.params['iteration'] // self.params['data_size'] + 1
        if metrics == 'cost':
            plt.plot(past_train_costs, color='orange', label='train')
            plt.plot(past_test_costs, color='lime', label='test')
            plt.ylabel("cost", fontsize=15)
            print("last train cost is {}".format(past_train_costs[-1]))
            print("last test cost is {}".format(past_test_costs[-1]))
        else:
            #plt.plot(np.array(past_train_accuracy), color='r')
            plt.plot(past_train_accuracy, color='orange', label='train')
            plt.plot(past_test_accuracy, color='lime', label='test')
            plt.ylabel("accuracy", fontsize=15)
            print("last train accuracy is {}".format(past_train_accuracy[-1]))
            print("last test accuracy is {}".format(past_test_accuracy[-1]))
            plt.ylim(-0.5, 100.5)

        plt.legend()
        plt.title('Learning Curve', fontsize=20)
        plt.xlabel("iteration[epoch]", fontsize=15)
        

### プロットはクラスに入れず、trainの返り値を使ってplotすればよい。

In [15]:
model = DNN(iteration=10, optimizer='adam', 
            hidden_layer_list = [100], batch_norm=True)

In [20]:
# params = {'optimizer': 'adam',
#          'batch_mode' : 'mini',
#          'init': 'he',
#          'lr': 0.007}

# #past_train_accuracy, past_test_accuracy = model.train(X,y, params)
# # model.plot_learning_curve(X,y, 'cost', params)

# model.plot_learning_curve(x_train,y_train, params)

### 提出用ファイル作成

In [17]:
# コメントアウトを外せば提出ファイルができる。
# test_df = pd.read_csv("test.csv")

# X_sub = test_df
# X_sub = np.array(X_sub)

# # train_flgでエラー中　すべてparamsに押し込めれば良い
# Y_pred = model.predict(X_sub)

# submission = pd.DataFrame({
#        "ImageId": np.array(test_df.index) + 1,
#        "Label": Y_pred
#    })

# submission.to_csv("./submission_001.csv", index=False)

### 入出力サイズ対応が必要か

### DNNクラス

### Layersクラス

### 単層クラス

- Affine層
    - SGD
    - Adam
    - adgrad
- 活性化関数
    - tanh
    - ReLU
    - sigmoid
- バッチノーマリゼーション層
- ドロップアウト層(要修正)


- 畳み込み層　（forward確認中）
- プーリング層 （forward、　）
- Flatten層 (済)

im2col col2im



In [116]:
params = {}
params['activation'] = 'relu'
act1 = Activation(params)


In [117]:
print(x_train.shape)
print(act1.forward(x_train).shape)

(80, 28, 28)
(80, 28, 28)


In [118]:
print(x_train_cifar10.shape)
print(act1.forward(x_train_cifar10).shape)

(100, 32, 32, 3)
(100, 32, 32, 3)


### 活性化関数はサイズを気にしない

In [21]:
x_train_cifar10[0].shape

(32, 32, 3)

### Flattenクラス

In [25]:
class Flatten(Layer):
    # (N, H, W, C)のデータを
    # (N, H * W * C)に変更する
    def __init__(self, params={}):
        super(Flatten, self).__init__(params)
        # 入出力サイズ以外のパラメータはない
    
    def forward(self, x):
        self.input_shape = x.shape
        out = np.array([elem.flatten() for elem in x_train])
        return out
    
    def backward(self, dout):
        dout = dout.reshape(self.input_shape)
        
        return dout

In [26]:
x_train[:, :, :, np.newaxis].shape

(80, 28, 28, 1)

In [123]:
flat = Flatten()
forward_val = flat.forward(x_train[:, :, :, np.newaxis])
forward_val.shape

(80, 784)

In [125]:
flat.backward(forward_val).shape

(80, 28, 28, 1)

### 出力が変わらないパディング

フィルターサイズが奇数であることを条件に、
フィルターサイズを２で割った商をパディング数とすればよい。

In [30]:
filter_size = 3

pad = filter_size // 2

print(x_train_cifar10[0].shape)
      
print(np.pad(x_train_cifar10,[(0,0),(pad, pad),(pad, pad),(0,0)],'constant').shape)

(32, 32, 3)
(100, 34, 34, 3)


### 同じように'edge'にすればパディングは隣接値をそのまま使う

In [31]:
print(np.pad(x_train_cifar10,[(0,0),(pad, pad),(pad, pad),(0,0)],'edge').shape)

(100, 34, 34, 3)


### im2colの作成

### これをim2colの反対のcol2imで元のデータ形状に戻してやれば良い

In [224]:
def im2col(x, filter_size=4, stride=4, pad=0, padding='constant'):
    # 4次元データを2次元データに変形する
    # XXX: もしかしたらできているかも？？　今の所パディングして数が変わるとエラーになる
    # TODO: 上記を修正して、畳み込み層にも使えるように修正する
    
    # パディングして外堀を埋める
    if pad > 0: 
        x = np.pad(x,[(0,0),(pad, pad),(pad, pad),(0,0)], padding)

    # データ数　高さ　幅　チャネルを取得
    N, H, W, C = x.shape
    
    # それぞれのブロックごとにflatten
    return np.array([x[n, h:h+filter_size, w:w+filter_size, c].flatten() for c in range(C) for n in range(N) \
            for h in range(0, H-filter_size+1, stride) for w in range(0, W-filter_size+1, stride)])

In [225]:
# im2colの逆をやる関数
def col2im(x, input_shape, filter_size=4, stride=4, pad=0, padding='constant'):
    
    # 戻すデータの形状
    N, H, W, C = input_shape
    
    # リターン箱を作る
    img = np.zeros(input_shape)
    # ブロック数
    block_num = (pad * 2 + W - filter_size)/ stride + 1 # 本当は高さ　幅別々に計算する

    # １チャネルに何行あるか
    c_vol = int(ret.shape[0] / C)
    n_vol = int(c_vol / N)

    for i, line in enumerate(ret):
        # 帯からブロックにする
        block = line.reshape(4, 4)

        channel_i = i//c_vol

        data_i = i % c_vol // n_vol

        start_h, start_w = divmod(i % n_vol, block_num) # 本当はblock_h
        start_h, start_w = int(start_h), int(start_w)

        end_h = start_h + 4 # filter_size or filter_height
        end_w = start_w + 4 # filter_size or filter_weight

        img[data_i, start_h:end_h, start_w:end_w, channel_i] = block
        
    return img[:, pad:pad+H, pad:pad+W, :]                

In [286]:
ret = im2col(x_train_cifar10[:10], pad=1)
x_train_cifar10[:10].shape

(10, 32, 32, 3)

In [287]:
ret.shape

(1920, 16)

In [228]:
col2im(ret, x_train_cifar10[:10].shape).shape

(10, 32, 32, 3)

### col2im im2colが完成！

### MaxPoolingクラス

In [248]:
class MaxPooling(Layer):
    
    def __init__(self, pool_size=4, stride=-1, pad=0, params={}):
        super(MaxPooling, self).__init__(params)
        self.pool_size = pool_size
        self.stride = stride
        self.pad = pad
        self.stride = self.pool_size if self.stride == -1 else stride
        # パディング方法をしてできたほうがよい？　今はゼロ埋め固定
        # 最大値の場所を保存して、backwardでその場所以外は伝播しない
        self.max_index = None
        self.input_shape = None # 親クラスで定義するのでここには後々必要なくなる
        
    def forward(self, x):
        
        self.input_shape = x.shape
        N, H, W, C = x.shape
        
        col = im2col(x, self.pool_size, self.pool_size, self.pad)
        col_max = np.max(col, axis=1)
        self.max_index = np.argmax(col, axis=1)
#       print(col_max.shape)
#       col_max = np.max(im2col(x, self.pool_size, self.stride, self.pad, 'constant'), axis=1)
        
        # 出力サイズを確認
        out_h = (H + 2*self.pad - self.pool_size)// self.stride + 1 
        out_w = (W + 2*self.pad - self.pool_size)// self.stride + 1 
        
        # 整形
        return col_max.reshape(C, N, out_h, out_w).transpose(1, 2, 3, 0)
    
    def backward(self, dout):
        dout_line = dout.transpose(3, 0, 1, 2).reshape(-1)
        filter_size = 4
        
        # 返り値の箱を作る
        ret = np.zeros([dout_line.shape[0],self.pool_size* self.pool_size])
        
        # 最大値の場所にdoutを流し込む
        for i, max_i in enumerate(poollayer.max_index):
            ret[i, max_i] = dout_line[i]
            
        # 元の形状に戻してリターン
        return col2im(ret, self.input_shape)

In [413]:
class Convolution(Layer):
    
    def __init__(self, out_channel = 1, filter_size=3, stride=1, pad=0, bias=True, params={}):
        # biasなしに対応していない。 dbを更新しなければよい？
        self.out_channel =out_channel
        self.filter_size = filter_size
        self.stride = stride
        self.pad = pad
        self.bias = bias
        self.W = None
        self.b = None
        ##### Affineからパクリ ########
        self.x = None
        # パラメータの微分値
        self.dW = None
        self.db = None
        self.optimize = None
        # 学習率のセット
        if 'lr' in params:
            self.lr = params['lr']
        else:
            self.lr = 0.01
        self.x_2dim = None # im2col後のxの値を保持
        
        
        
        
    def initialize(self, in_shape, params={}):
        self.in_shape = in_shape # N H W Cで来ると想定
        N, H, W, C = in_shape
        # 出力の高さと幅を計算する
        out_h = 1 + int((H + 2*self.pad - self.filter_size) / self.stride)
        out_w = 1 + int((W + 2*self.pad - self.filter_size) / self.stride)
        
        self.out_shape = (N, out_h, out_w, self.out_channel)
        
        # 重みの初期化
        # im2colを見越して２次元の重みとして実装
        self.W = np.random.randn(self.filter_size * self.filter_size * self.in_shape[3], self.out_channel)
        self.b = np.zeros([self.out_channel, 1])
        # TODO 初期化を選択できるように設定する　ひとまずはガウス初期化
#         if params['init'] == 'gauss':
        self.W *= 0.01
#         elif params['init'] == 'xavier':
#             # 入力層のユニット数 N * out_h * out_w ? 
#             self.W /= np.sqrt(self.out_shape[0] * self.out_shape[1] * self.out_shape[2])
#         else: # He
#             self.W = self.W / np.sqrt(self.out_shape[0] * self.out_shape[1] * self.out_shape[2]) * np.sqrt(2) 
        
        # 更新式のスイッチング
        # optimizeメソッドをoptimizerによって切り替える。
#         if params['optimizer']=='sgd':
#             self.optimize = self.update_sgd
#         elif params['optimizer'] == 'adagrad':
#             self.h = np.zeros_like(W)
#             self.optimize = self.update_adagrad
#         else: # params['optimizer'] == 'adam':
#             self.m = np.zeros_like(W)
#             self.v = np.zeros_like(W)
#             self.beta1 = 0.9
#             self.beta2 = 0.999
#             self.optimize = self.update_adam
        
    def forward(self, x):
        in_C = self.in_shape[3]
        out_C = self.out_shape[3]
        N = self.in_shape[0]
        
        # 出力の高さと幅を計算する
        out_h = 1 + int((H + 2*self.pad - self.filter_size) / self.stride)
        out_w = 1 + int((W + 2*self.pad - self.filter_size) / self.stride)
        
        print("IN {} ".format(x.shape))
        # ２次元配列に変換する
        # (out_h * out_w * N * C, filter_size*filter_size)
        x_2dim = im2col(x, self.filter_size, self.stride, self.pad)
        self.x_2dim = x_2dim
        print("after im2col {} ".format(x_2dim.shape))
        
        # 各色（チャネル）ごとに
        W_color = self.W.reshape(in_C, -1, out_C)
        print("W {} ".format(self.W.shape))
        print("W_color {} ".format(W_color.shape))
        x_2dim_color = x_2dim.reshape(in_C, -1, out_h * out_w * N)
        print("x_2dim_color {} ".format(x_2dim_color.shape))
        
        out = np.zeros((in_C, out_C, out_h * out_w * N))
        for c in range(in_C):
            out[c] = np.dot(W_color[c].T, x_2dim_color[c])
        
        print("out before sum {} ".format(out.shape))
        # RGBごとの結果を合計する
        out = np.sum(out, axis=0)
        print("out after sum {} ".format(out.shape))
        
        out = out + self.b
        
        # 整形する　out_c N H W
        return out.reshape(out_C, N, out_h, out_w).transpose(1, 2 ,3, 0)
        
    
    def backward(self, dout):
        out_c = self.out_shape[3]
        print("dout {} ".format(self.out_shape))
        dout = dout.reshape(-1, out_c)
        print("dout after reshape{} ".format(dout.shape))
        
        self.db = np.sum(dout, axis=0)[:, np.newaxis]
        print("db after sum{} ".format(self.db.shape))
        print("d {}".format(self.b.shape))
        print("W {} ".format(self.W.shape))
        print("X shape {} ".format(self.x.shape))
        #self.dW = 
    
    
    #####affineからパクリ
    def update_sgd(self):
        self.W -= self.lr * self.dW
        self.b -= self.lr * self.db
    
    
    # adagrad 少しずつ更新量が減っていく
    def update_adagrad(self, lr = 0.01):
        self.h += self.dW ** 2
        self.W -= self.lr * self.dW / (np.sqrt(self.h) + 1e-7)
        self.b -= self.lr * self.db
        
    
    def update_adam(self, lr = 0.01):
        self.m = self.beta1 * self.m + (1- self.beta1) * self.dW
        self.v = self.beta2 * self.v + (1- self.beta2) * (self.dW * self.dW)
        
        m_hat = self.m / (1 - self.beta1)
        v_hat = self.v / (1 - self.beta2)
        
        self.W -= self.lr * m_hat / (np.sqrt(v_hat) + 1e-8)
        self.b -= self.lr * self.db

#### forwardメソッドの確認

In [419]:
a = np.arange(6).reshape(2, 3)

In [423]:
np.tile(a, (3,1))

array([[0, 1, 2],
       [3, 4, 5],
       [0, 1, 2],
       [3, 4, 5],
       [0, 1, 2],
       [3, 4, 5]])

In [415]:
conv = Convolution(2, 3, 1, 0)
data = x_train_cifar10[:10]
conv.initialize(data.shape)
out = conv.forward(data)

IN (10, 32, 32, 3) 
after im2col (27000, 9) 
W (27, 2) 
W_color (3, 9, 2) 
x_2dim_color (3, 9, 9000) 
out before sum (3, 2, 9000) 
out after sum (2, 9000) 


In [416]:
conv.backward(out)

dout (10, 30, 30, 2) 
dout after reshape(9000, 2) 
db after sum(2, 1) 
d (2, 1)
W (27, 2) 


AttributeError: 'NoneType' object has no attribute 'shape'

In [379]:
conv.W.shape

(27, 2)

In [385]:
conv.in_shape

(10, 32, 32, 3)

In [389]:
data.shape

(10, 32, 32, 3)

In [381]:
out.shape

(10, 30, 30, 2)

In [383]:
out.reshape(-1, 2).shape

(9000, 2)

In [329]:
col = np.arange(60)
data = col.reshape(2, -1)
data

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
        46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])

In [341]:
# out_c N H W
data.reshape(2, 5, 2, 3).transpose(1, 2 ,3, 0)[0]

array([[[ 0, 30],
        [ 1, 31],
        [ 2, 32]],

       [[ 3, 33],
        [ 4, 34],
        [ 5, 35]]])

In [323]:
def im2col_v2(x, filter_size=4, stride=4, pad=0, padding='constant'):
    # 4次元データを2次元データに変形する
    # XXX: もしかしたらできているかも？？　今の所パディングして数が変わるとエラーになる
    # TODO: 上記を修正して、畳み込み層にも使えるように修正する
    
    # パディングして外堀を埋める
    if pad > 0: 
        x = np.pad(x,[(0,0),(pad, pad),(pad, pad),(0,0)], padding)

    # データ数　高さ　幅　チャネルを取得
    N, H, W, C = x.shape
    
    # それぞれのブロックごとにflatten
    return np.array([x[n, h:h+filter_size, w:w+filter_size, c].flatten() for n in range(N) for c in range(C)  \
            for h in range(0, H-filter_size+1, stride) for w in range(0, W-filter_size+1, stride)])

In [327]:
data = x_train_cifar10[:1]
im2col_v2(data).shape

(192, 16)

In [250]:
col_max = np.max(im2col(x_train_cifar10[:10], 4, 4, 0, 'constant'), axis=1)
col_max.reshape(3, 10, 8, 8).transpose(1, 2, 3, 0).shape

(10, 8, 8, 3)

In [251]:

poollayer = MaxPooling()
poollayer.forward(x_train_cifar10[:10]).shape

(10, 8, 8, 3)

In [259]:
poollayer = MaxPooling(pad=0)
print(x_train_cifar10[:10].shape)
dout = poollayer.forward(x_train_cifar10[:10])
dout.shape

(10, 32, 32, 3)


(10, 8, 8, 3)

In [260]:
d = poollayer.backward(dout)
d.shape

(10, 32, 32, 3)

### MaxPoolingの確認

In [261]:
data = np.arange(16).reshape(1,4,4,1)

In [262]:
data.shape

(1, 4, 4, 1)

In [263]:
poollayer.forward(data)

array([[[[15]]]])

#### MaxPoolingのbackward関数

In [264]:
dout = poollayer.forward(x_train_cifar10[:10])



In [265]:
dout.shape

(10, 8, 8, 3)

In [266]:
# col_max.reshape(C, N, out_h, out_w).transpose(1, 2, 3, 0)

In [267]:
dout.transpose(3, 0, 1, 2).reshape(-1, 64).shape

(30, 64)

In [268]:
poollayer = MaxPooling()
dout = poollayer.forward(x_train_cifar10[:10])
print(dout.shape)

dout_line = dout.transpose(3, 0, 1, 2).reshape(-1)


filter_size = 4


(10, 8, 8, 3)


In [269]:
np.ones([dout.transpose(3, 0, 1, 2).reshape(-1).shape[0],filter_size]) * dout.transpose(3, 0, 1, 2).reshape(-1)[:, np.newaxis]

array([[106., 106., 106., 106.],
       [145., 145., 145., 145.],
       [149., 149., 149., 149.],
       ...,
       [ 62.,  62.,  62.,  62.],
       [ 79.,  79.,  79.,  79.],
       [ 86.,  86.,  86.,  86.]])

In [270]:
modosu = np.ones([dout.transpose(3, 0, 1, 2).reshape(-1).shape[0],filter_size*filter_size]) * dout.transpose(3, 0, 1, 2).reshape(-1)[:, np.newaxis]
print(modosu.shape)

(1920, 16)


In [271]:
modosu.size

30720

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_1 (Conv2D)            (None, 28, 28, 2)         20        
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 28, 2)         8         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 2)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 2)         38        
_________________________________________________________________
batch_normalization_2 (Batch (None, 14, 14, 2)         8         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 2)           0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 2)           38        
_________________________________________________________________
batch_normalization_3 (Batch (None, 7, 7, 2)           8         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 3, 3, 2)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 18)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               2432      
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290  

In [None]:
from collections import OrderedDict

class LeNetLayers:
    def __init__(self, params):
        unit_size_list = [params['input_size']]
        unit_size_list.extend(params['hidden_layer_list'])
        unit_size_list.append(params['output_size'])
        
        self.params = {}
        
        # レイヤの生成
        self.layers = OrderedDict()
        
        # とりあえずここにべた書きで書けるようにする
        # self.layer['Conv1'] = Convolution() # 2 (3,3) (1,1) same
        # self.layer['BatchNorm1'] = BatchNorm(params)
        # self.layer['Active1'] = Activation(params) # relu
        # self.layer['Pool1'] = MaxPooling(params, 2, 2, 0) 
        
        # self.layer['Conv2'] = Convolution() # 2 (3,3) (1,1) same
        # self.layer['BatchNorm2'] = BatchNorm(params)
        # self.layer['Active2'] = Activation(params) # relu
        # self.layer['Pool2'] = MaxPooling(params, 2, 2, 0) 
        
        # self.layer['Conv3'] = Convolution() # 2 (3,3) (1,1) same
        # self.layer['BatchNorm3'] = BatchNorm(params)
        # self.layer['Active3'] = Activation(params) # relu
        # self.layer['Pool3'] = MaxPooling(params, 2, 2, 0) 
        
        
#         for i in range(1, len(unit_size_list)):
#             # 重みの初期化
#             init_W = np.random.randn(unit_size_list[i-1], unit_size_list[i])
#             init_b = np.zeros([1, unit_size_list[i]])
#             if params['init'] == 'gauss':
#                 init_W *= 0.01
#             elif params['init'] == 'xavier':
#                 init_W /= np.sqrt(unit_size_list[i-1])
#             else: # He
#                 init_W = init_W / np.sqrt(unit_size_list[i-1]) * np.sqrt(2) 
                
#             # アフィン変換層（Wx + b）を追加する
#             self.layers['Affine' + str(i)] = Affine(init_W, init_b, params)
            
#             # 最終層以外はバッチノーマリゼーション層と活性化関数層を追加する
#             if i < (len(unit_size_list)-1):
#                 if params['batch_norm'] == True:
#                     self.layers['BatchNorm' + str(i)] = BatchNorm(params)
#                 self.layers['Active' + str(i)] = Activation(params)
# #                 if params['dropout_ratio'] > 0:
# #                     self.layers['Dropout' + str(i)] = Dropout(params)
        
        self.lastLayer = SoftmaxWithLoss()
        
        # self.params['hidden_layer_num'] = len(unit_size_list)-1
        
    def predict(self, x):
        # forwardを繰り返す
        # ソフトマックスを通さなくても答えは出るのでこれで予測とする 
        # argmaxでラベルを取れる
        for layer in self.layers.values():
            x =layer.forward(x)

        return x

    def accuracy(self, x, t):
        # 正答率を小数点第二桁で出力する
        y_pred = self.predict(x)
        y_pred = np.argmax(y_pred, axis=1)
        y_true = np.argmax(t, axis=1)
        data_size = x.shape[0]

        correct_count = np.sum([y_true == y_pred]) 
        score = correct_count / data_size * 100

        return round(score, 2)
    
    def loss(self, x, t):
        y = self.predict(x)
        
        return self.lastLayer.forward(y, t)
    
    
    def optimize(self, x, t):
        
        # forward 
        self.loss(x, t)
        
        # backward
        dout = self.lastLayer.backward(1)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        # optimizeメソッドがある層は更新を行う
        # AffineとBatchNorm層のみ行うはず
        for layer in self.layers.values():
            if hasattr(layer, "optimize"):
                layer.optimize()
            