In [None]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import OrderedDict
from layers import Convolution, MaxPooling, ReLU, Affine, SoftmaxWithLoss,im2col,col2im,BatchNormalization,Adam,Dropout,GlobalAveragePooling,LeakyReLU
# from optimizer import RMSProp
from PIL import Image
import os,shutil
from sklearn.model_selection import train_test_split

In [None]:
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28), 
                 conv_param={'filter_num':32, 'filter_size':5, 'pad':1, 'stride':1},
                 pool_param={'pool_size':2, 'pad':0, 'stride':2},
                 hidden_size=80, output_size=15, weight_init_std=0.01):  #15にした
        """
        input_size : tuple, 入力の配列形状(チャンネル数、画像の高さ、画像の幅)
        conv_param : dict, 畳み込みの条件
        pool_param : dict, プーリングの条件
        hidden_size : int, 隠れ層のノード数
        output_size : int, 出力層のノード数
        weight_init_std ： float, 重みWを初期化する際に用いる標準偏差
        """
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        pool_size = pool_param['pool_size']
        pool_pad = pool_param['pad']
        pool_stride = pool_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size + 2*filter_pad - filter_size) // filter_stride + 1 # 畳み込み後のサイズ(H,W共通)
        pool_output_size = (conv_output_size + 2*pool_pad - pool_size) // pool_stride + 1 # プーリング後のサイズ(H,W共通)
        
        #追加した部分
        #Affineレイヤの引数と実際の形状を揃える  1層目の形状を元に2層目の形状を計算するので、1層目の形状計算の部分もある
        conv2_output_size=(pool_output_size+2*filter_pad-filter_size)//filter_stride+1     #input_sizeは畳み込み層の入力画像の高さ  入力画像の高さは畳み込みとマックスプーリングのみで変化している  conv1の出力は(元の入力画像の高さ + 2*self.pad - FH) // self.stride + 1 これはconv_output_sizeのこと    convoutputsizeの後にマックスプーリングで変化した後、pooloutput_sizeがconv2の入力画像の高さ
        pool_output_size_2=(conv2_output_size + 2*pool_pad - pool_size) // pool_stride + 1 # プーリング後のサイズ(H,W共通)
        conv3_output_size=(pool_output_size_2+2*filter_pad-filter_size)//filter_stride+1     #input_sizeは畳み込み層の入力画像の高さ  入力画像の高さは畳み込みとマックスプーリングのみで変化している  conv1の出力は(元の入力画像の高さ + 2*self.pad - FH) // self.stride + 1 これはconv_output_sizeのこと    convoutputsizeの後にマックスプーリングで変化した後、pooloutput_sizeがconv2の入力画像の高さ
        pool_output_size_3=(conv3_output_size + 2*pool_pad - pool_size) // pool_stride + 1 
        pool_output_pixel = 64 * pool_output_size_3 * pool_output_size_3 # プーリング後のピクセル総数  畳み込み２層なので、filternumから64にする
        self.wrong_image_array_list=[]
        self.wrong_image_label_list=[]

        # 重みの初期化
        self.params = {}
        std = weight_init_std
        self.params['W1'] = std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) # W1は畳み込みフィルターの重みになる  畳み込みは重みの形状で特徴量次元が決まる
        self.params['b1'] = np.zeros(filter_num) #b1は畳み込みフィルターのバイアスになる
        self.params['W2'] = std *  np.random.randn(pool_output_pixel, hidden_size)   
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = std *  np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)
        self.params['W6'] = std *  np.random.randn(hidden_size, hidden_size)
        self.params['b6'] = np.zeros(hidden_size)
        self.params['W7'] = std *  np.random.randn(hidden_size, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W4']=std*np.random.randn(64,filter_num,filter_size,filter_size)#(フィルター数, Conv1の出力チャンネル数, フィルターサイズ, フィルターサイズ) 
        self.params['b4']=np.zeros(64)
        self.params['W5']=std*np.random.randn(64,64,filter_size,filter_size)#(フィルター数, Conv1の出力チャンネル数, フィルターサイズ, フィルターサイズ) 
        self.params['b5']=np.zeros(64)
        
        self.params['gamma1']=np.ones(filter_num)  #パラメータ辞書に追加しているということなので、gammaは定義されていないため後で引数の部分は定義している
        self.params['beta1']=np.zeros(filter_num)
        self.params['gamma2']=np.ones(64)
        self.params['beta2']=np.zeros(64)
        self.params['gamma3']=np.ones(64)
        self.params['beta3']=np.zeros(64)
        self.params['gamma4']=np.ones(hidden_size)
        self.params['beta4']=np.zeros(hidden_size)
        self.params['gamma5']=np.ones(hidden_size)
        self.params['beta5']=np.zeros(hidden_size)
        self.params['gamma6']=np.ones(hidden_size)
        self.params['beta6']=np.zeros(hidden_size)

        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                           conv_param['stride'], conv_param['pad']) # W1が畳み込みフィルターの重み, b1が畳み込みフィルターのバイアスになる  stride=1なので形状は同じ
        #バッチ正規化
        self.layers['BatchNorm_1']=BatchNormalization(self.params['gamma1'],self.params['beta1'])#,moving_mean=self.params['mm1'],moving_var=self.params['mv1']  #バッチ正規化がチャンネルごとなためスケール変数gamma,シフトパラメータbetaは一次元配列
        self.layers['ReLU1'] = ReLU()
        self.layers['Pool1'] = MaxPooling(pool_h=pool_size, pool_w=pool_size, stride=pool_stride) #マックスプーリングで形状が小さくなる プーリングの条件
        
        self.layers['Conv2']=Convolution(self.params['W4'],self.params['b4'],
                                         conv_param['stride'],conv_param['pad'])
        self.layers['BatchNorm_2']=BatchNormalization(self.params['gamma2'],self.params['beta2'])
        self.layers['ReLU2']=ReLU()
        self.layers['Pool2']=MaxPooling(pool_h=pool_size,pool_w=pool_size,stride=pool_stride)

        self.layers['Conv3']=Convolution(self.params['W5'],self.params['b5'],
                                         conv_param['stride'],conv_param['pad'])
        self.layers['BatchNorm_3']=BatchNormalization(self.params['gamma3'],self.params['beta3'])
        self.layers['ReLU3']=ReLU()
        self.layers['Pool3']=MaxPooling(pool_h=pool_size,pool_w=pool_size,stride=pool_stride)

        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['BatchNorm_4']=BatchNormalization(self.params['gamma4'],self.params['beta4'])
        self.layers['ReLU2'] = ReLU()
        self.layers['Affine3'] = Affine(self.params['W6'], self.params['b6'])
        self.layers['BatchNorm_5']=BatchNormalization(self.params['gamma5'],self.params['beta5'])
        self.layers['ReLU4'] = ReLU()
        self.layers['Affine4'] = Affine(self.params['W7'], self.params['b7'])
        self.layers['BatchNorm_6']=BatchNormalization(self.params['gamma6'],self.params['beta6'])
        self.layers['ReLU5'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()#forwardは、yをsoftmax y,tの交差エントロピー
    
    def predict(self, x,train_flg=False):
        # for layer in self.layers.values():
        for key,layer in self.layers.items():
            if key in ['Dropout' , 'BatchNorm_1', 'BatchNorm_2' ,'BatchNorm_3', 'BatchNorm_4','BatchNorm_5','BatchNorm_6', 'BatchNorm_7' ,'BatchNorm_8']:
                print('trainmode')
                x = layer.forward(x,train_flg)
            else:
                x=layer.forward(x)
        return x
    
    def loss(self, x, t,train_flg=False):
        """
            x : 入力データ
            t : 教師データ
        """
        y = self.predict(x,train_flg)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        acc = 0.0
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]  #バッチサイズ分28*28の要素がある
            tt = t[i*batch_size:(i+1)*batch_size]  #バッチサイズ分0と1のワンホットエンコーディングのリストがある
            y = self.predict(tx,train_flg=False)  #バッチサイズ分予測し、予測結果はワンホットエンコーディング
            y = np.argmax(y, axis=1)  #argmaxでインデックスにする　
            acc += np.sum(y == tt) #インデックスの配列をラベルと比較して一致した要素数を合計    tx[(i*batch_size)+yのワンホット配列とttのワンホット配列が一致していないインデックス]             
    
            if y!=tt:  #誤分類
                    img=np.squeeze(tx)
                    self.wrong_image_array_list.append(tx)
                    self.wrong_image_label_list.append(tt)

                    img=(img*255).astype('uint8')#0~1でスケーリングされているため
                    #print(type(img),img.shape)
                    img = Image.fromarray(img)
                    img_converted=img.convert('L')
                    img_converted.save(f"../../2_notebook/wrong/{count}.png")
        return acc / x.shape[0]
    
    #誤分類ファイルとラベルをself.wrong_image_listとlabel_listに保存しているので、
    def save_wrong_list(self):
        wrong_images=np.array(self.wrong_image_array_list)
        wrong_labels=np.array(self.wrong_image_label_list)
        np.save('wrong_images.npy',wrong_images)
        np.save('wrong_labels.npy',wrong_labels)
        print('images and labels have been saved')

    def gradient(self, x, t):

        """勾配を求める（誤差逆伝播法）
        Parameters
        ----------
        x : 入力データ
        t : 教師データ
        Returns
        -------
        各層の勾配を持ったディクショナリ変数
            grads['W1']、grads['W2']、...は各層の重み
            grads['b1']、grads['b2']、...は各層のバイアス
        """
        # forward
        self.loss(x, t,train_flg=True)  #loss内のpredictは本来train_flg=falseだがTrueに設定
        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        # 設定
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
        grads['W4'],grads['b4']=self.layers['Conv2'].dW,self.layers['Conv2'].db
        grads['W5'],grads['b5']=self.layers['Conv3'].dW,self.layers['Conv3'].db
        grads['W6'], grads['b6'] = self.layers['Affine3'].dW, self.layers['Affine3'].db
        grads['W7'], grads['b7'] = self.layers['Affine4'].dW, self.layers['Affine4'].db
        grads['gamma1'],grads['beta1']=self.layers['BatchNorm_1'].dgamma,self.layers['BatchNorm_1'].dbeta  #BatchNormalization レイヤーによって計算された gamma と beta の勾配を、grads ディクショナリに 'gamma1' と 'beta1' というキーで格納
        grads['gamma2'],grads['beta2']=self.layers['BatchNorm_2'].dgamma,self.layers['BatchNorm_2'].dbeta
        grads['gamma3'],grads['beta3']=self.layers['BatchNorm_3'].dgamma,self.layers['BatchNorm_3'].dbeta
        grads['gamma4'],grads['beta4']=self.layers['BatchNorm_4'].dgamma,self.layers['BatchNorm_4'].dbeta
        grads['gamma5'],grads['beta5']=self.layers['BatchNorm_5'].dgamma,self.layers['BatchNorm_5'].dbeta
        grads['gamma6'],grads['beta6']=self.layers['BatchNorm_6'].dgamma,self.layers['BatchNorm_6'].dbeta


        return grads

In [None]:
train_data_original=np.load("../../1_data/train_data.npy")
train_label_original=np.load("../../1_data/train_label.npy")
train_data_original = train_data_original.reshape(-1, 1, 28, 28) 
print(train_data_original.shape)
print(train_label_original.shape)

train_data=np.load('../../1_data/train_data_image_data_generator.npy')
train_label=np.load('../../1_data/train_label_image_data_generator.npy')
print(train_data.shape)
print(train_label.shape)

train_data=np.concatenate([train_data_original[:3000],train_data[:21000]],axis=0)
train_label=np.concatenate([train_label_original[:3000],train_label[:21000]],axis=0)

In [None]:
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []

train_data=train_data.astype(np.float32)
train_label=train_label.astype(np.float32)

# CNNのオブジェクト生成
snet = SimpleConvNet(input_dim=(1, 28, 28), 
                     conv_param={'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
                     pool_param={'pool_size':2, 'pad':0, 'stride':2},
                     hidden_size=100, output_size=15, weight_init_std=0.01)  
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, 
                                                        test_size=0.2,
                                                        shuffle=True
                                                        )#random_state=1234
train = X_train/255
test = X_test/255
train = train.reshape(-1, 28*28)
test = test.reshape(-1, 28*28)
train = train.reshape(-1, 1, 28, 28)  #trainはX_train
test = test.reshape(-1, 1, 28, 28)   #testはX_test

x=train
t=y_train  #正解ラベル
test_labels=y_test
x = x.reshape(-1,1,28,28) # 配列形式の変形
epochs =1 
batch_size =128
# optimizer = RMSProp(lr=0.0001, rho=0.9)
optimizer=Adam(lr=0.001)
# 繰り返し回数
xsize = x.shape[0]
iter_num = np.ceil(xsize / batch_size).astype(np.int)

for epoch in range(epochs):
    print("epoch=%s"%epoch)
    # シャッフル
    idx = np.arange(xsize)
    np.random.shuffle(idx)
    for it in range(iter_num):
        """
        ランダムなミニバッチを順番に取り出す
        """
        print("it=", it)
        mask = idx[batch_size*it : batch_size*(it+1)]
        # ミニバッチの生成
        x_train = x[mask]
        t_train = t[mask]
        # 勾配の計算 (誤差逆伝播法を用いる) 
        grads = snet.gradient(x_train, t_train)
        # 更新
        optimizer.update(snet.params, grads)
    ## 学習経過の記録
    # 訓練データにおけるloss
    train_loss.append(snet.loss(x,  t))
    # テストデータにおけるloss
    test_loss.append(snet.loss(test, test_labels))#testはX_test
    # 訓練データにて精度を確認
    train_accuracy.append(snet.accuracy(x, t))#誤分類ファイル保存には、lossではなくaccuracy
    # テストデータにて精度を算出
    test_accuracy.append(snet.accuracy(test, test_labels))
    
    if all (x>0.995 for x in [train_accuracy[epoch],train_accuracy[epoch],test_accuracy[epoch],test_accuracy[epoch]]) or  (train_accuracy[epoch]>0.995 and train_accuracy[epoch-1]>0.995 and test_accuracy[epoch]>0.995 and test_accuracy[epoch-1]>0.995):
        params={}
        params=snet.params
        params['mm1']=snet.layers['BatchNorm_1'].moving_mean#moving_mean属性　paramsとして辞書形でpickleに保存
        params['mv1']=snet.layers['BatchNorm_1'].moving_var
        params['mm2']=snet.layers['BatchNorm_2'].moving_mean
        params['mv2']=snet.layers['BatchNorm_2'].moving_var
        params['mm3']=snet.layers['BatchNorm_3'].moving_mean
        params['mv3']=snet.layers['BatchNorm_3'].moving_var
        params['mm4']=snet.layers['BatchNorm_4'].moving_mean
        params['mv4']=snet.layers['BatchNorm_4'].moving_var
        params['mm5']=snet.layers['BatchNorm_5'].moving_mean
        params['mv5']=snet.layers['BatchNorm_5'].moving_var
        params['mm6']=snet.layers['BatchNorm_6'].moving_mean
        params['mv6']=snet.layers['BatchNorm_6'].moving_var
        break

In [None]:
# lossとaccuracyのグラフ化
df_log = pd.DataFrame({"train_loss":train_loss,
             "test_loss":test_loss,
             "train_accuracy":train_accuracy,
             "test_accuracy":test_accuracy})

df_log.plot(style=['r-', 'r--', 'b-', 'b--'])
plt.ylim([0,3])
plt.ylabel("Accuracy or loss")
plt.xlabel("epochs")
plt.show()

In [None]:
test_data=np.load('../../1_data/test_data.npy')
test_label=np.load('../../1_data/test_label.npy')
test_data = (test_data/255).astype(np.float32)
test_data = test_data.reshape(-1, 28*28)
test_data = test_data.reshape(-1, 1, 28, 28) 

In [None]:
#取り出したパラメータの保存
#上のブロックで保存してある
params={}
params=snet.params
params['mm1']=snet.layers['BatchNorm_1'].moving_mean#moving_mean属性　paramsとして辞書形でpickleに保存
params['mv1']=snet.layers['BatchNorm_1'].moving_var
params['mm2']=snet.layers['BatchNorm_2'].moving_mean
params['mv2']=snet.layers['BatchNorm_2'].moving_var
params['mm3']=snet.layers['BatchNorm_3'].moving_mean
params['mv3']=snet.layers['BatchNorm_3'].moving_var
params['mm4']=snet.layers['BatchNorm_4'].moving_mean
params['mv4']=snet.layers['BatchNorm_4'].moving_var
params['mm5']=snet.layers['BatchNorm_5'].moving_mean
params['mv5']=snet.layers['BatchNorm_5'].moving_var
params['mm6']=snet.layers['BatchNorm_6'].moving_mean
params['mv6']=snet.layers['BatchNorm_6'].moving_var

with open("../../2_notebook/submit/katakana_model.pickle", "wb") as f:
    pickle.dump(params, f)

In [None]:
#パラメーターの読み込み
with open("katakana_model.pickle", "rb") as f:
    #モデル全体
    model = pickle.load(f)
    model=SimpleConvNet(input_dim=(1, 28, 28), 
                        conv_param={'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
                        pool_param={'pool_size':2, 'pad':0, 'stride':2},
                        hidden_size=100, output_size=15, weight_init_std=0.01)


    model.layers['Conv1'].W = params['W1']
    model.layers['Conv1'].b = params['b1']
    model.layers['Conv2'].W = params['W4']
    model.layers['Conv2'].b = params['b4']
    model.layers['Conv3'].W = params['W5']
    model.layers['Conv3'].b = params['b5']

    model.layers['Affine1'].W = params['W2']
    model.layers['Affine1'].b = params['b2']
    model.layers['Affine2'].W = params['W3']
    model.layers['Affine2'].b = params['b3']
    model.layers['Affine3'].W = params['W6']
    model.layers['Affine3'].b = params['b6']
    model.layers['Affine4'].W = params['W7']
    model.layers['Affine4'].b = params['b7']

    model.layers['BatchNorm_1'].gamma=params['gamma1']
    model.layers['BatchNorm_1'].beta=params['beta1']
    model.layers['BatchNorm_1'].moving_mean=params['mm1']#保存してあったmovingmean属性
    model.layers['BatchNorm_1'].moving_var=params['mv1']
    model.layers['BatchNorm_2'].gamma=params['gamma2']
    model.layers['BatchNorm_2'].beta=params['beta2']
    model.layers['BatchNorm_2'].moving_mean=params['mm2']
    model.layers['BatchNorm_2'].moving_var=params['mv2']
    model.layers['BatchNorm_3'].gamma=params['gamma3']
    model.layers['BatchNorm_3'].beta=params['beta3']
    model.layers['BatchNorm_3'].moving_mean=params['mm3']
    model.layers['BatchNorm_3'].moving_var=params['mv3']
    model.layers['BatchNorm_4'].gamma=params['gamma4']
    model.layers['BatchNorm_4'].beta=params['beta4']
    model.layers['BatchNorm_4'].moving_mean=params['mm4']
    model.layers['BatchNorm_4'].moving_var=params['mv4']
    model.layers['BatchNorm_5'].gamma=params['gamma5']
    model.layers['BatchNorm_5'].beta=params['beta5']
    model.layers['BatchNorm_5'].moving_mean=params['mm5']
    model.layers['BatchNorm_5'].moving_var=params['mv5']
    model.layers['BatchNorm_6'].gamma=params['gamma6']
    model.layers['BatchNorm_6'].beta=params['beta6']
    model.layers['BatchNorm_6'].moving_mean=params['mm6']
    model.layers['BatchNorm_6'].moving_var=params['mv6']
    

    accuracy = model.accuracy(test_data, test_label)  #モデルがtest_dataについて予測を行い、予測と正解ラベルで正解率を計算
    loss  = model.loss(test_data, test_label)  #
    print(accuracy)#実際にはサーバーに送信
    print(loss)