In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
    #      print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install natsort

In [None]:
pip install tqdm

In [None]:
# 環境変数
%matplotlib inline
import keras
from keras.datasets import cifar10
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential, load_model
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import glob
import cv2
from natsort import natsorted
import re
import pathlib
import zipfile
from tqdm import tqdm

# **cifar10のデータ読み込み**

In [None]:
# データのロード
num_classes = 10 # クラス数

(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# X_train = X_train.astype('float32')
X_test  = X_test.astype('float32') / 255.0   # 0.0～1.0に補正
y_train = to_categorical(y_train, num_classes)
y_test  = to_categorical(y_test, num_classes)

# **ハイパーパラメータ**

In [None]:
# 学習率の減衰設定
# Epochが30以降は学習するごとに0.98倍
def lr_decay(epoch):
    lr = 0.001
    
    if epoch > 30: 
        for e in range(epoch - 30):
            lr*=0.98
            
    return lr

In [None]:
# 学習のハイパーパラメータ
EPOCHS = 1000              # 学習回数
hidden_nodes1 = 128        # 中間層ノード数1
hidden_nodes2 = 256        # 中間層ノード数2
hidden_nodes3 = 512        # 中間層ノード数3
output_nodes  = 1024       # 全結合層ノード数
validation_rate = 0.2      # trainデータに対するvalidationデータの割合
IMAGE_SIZE = 32            # 入力画像サイズ
BATCH_SIZE = 500           # 学習する画像枚数

In [None]:
# 内部データ確認
print('X_train[0][0][0] ->', X_train[0][0][0])
print('y_train[0]       ->', y_train[0])

for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(X_train[i])
    plt.axis("off")
plt.suptitle('The original image', fontsize=18)
plt.show()

In [None]:
# trainデータからtrainデータとvalidationデータに分割
# test_size：trainデータからvalデータにする割合
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=validation_rate)

steps_per_epoch = X_train.shape[0] // BATCH_SIZE
validation_steps = X_test.shape[0] // BATCH_SIZE

In [None]:
# 各形状確認
# X_ ：画像枚数，幅，高さ，チャンネル(赤，青，緑)
# y_ ：画像枚数．ラベル数
print("X_train.shape ->", X_train.shape)
print("y_train.shape ->", y_train.shape)
print("X_val.shape   ->", X_val.shape)
print("y_val.shape   ->", y_val.shape)
print("X_test.shape  ->", X_test.shape)
print("y_test.shape  ->", y_test.shape)

# **データの水増し条件**

In [None]:
# trainデータの水増し
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,       # 画素値を0.0～1.0に補正
    rotation_range=20,       # 整数．画像をランダムに回転する回転範囲．
    width_shift_range=0.2,   # 浮動小数点数（横幅に対する割合）．ランダムに水平シフトする範囲．
    height_shift_range=0.2,  # 浮動小数点数（縦幅に対する割合）．ランダムに垂直シフトする範囲．
    shear_range=0.2,         # 浮動小数点数．シアー強度（反時計回りのシアー角度）．
    zoom_range=0.2,          # 浮動小数点数または[lower，upper]．ランダムにズームする範囲．浮動小数点数
    horizontal_flip=True,    # 真理値．垂直方向に入力をランダムに反転します．
    channel_shift_range=0.2  # 画像の明るさをランダムに変化
    #samplewise_center=True,           # チャンネルごとの平均を0に
    #samplewise_std_normalization=True,# チャンネルごとの分散を1に
    #zca_epsilon=1e-6,                 # 白色化の度合い
    #zca_whitening=True,                # 白色化
)

# 与えられたサンプルデータに基づいて，データに依存する統計量を計算
# featurewise_center，featurewise_std_normalization，zca_whiteningが指定されたときに必要
# train_datagen.fit(X_train, augment=True, rounds=1, seed=None)

# データとラベルの配列を取得し、拡張データのバッチを生成
train_generator = train_datagen.flow(
    X_train,
    y_train,
    batch_size=BATCH_SIZE,
)

# trainデータ
validation_datagen = ImageDataGenerator(
   rescale=1.0 / 255,
)

validation_generator = validation_datagen.flow(
    X_val,
    y_val,
    batch_size=BATCH_SIZE,
)


# 表示
X_batch, y_batch = train_generator.next()

for i in range(10):  # 水増し仕様の10枚を表示
    plt.subplot(2, 5, i+1)
    plt.imshow(X_batch[i])
    plt.axis("off")
    
    
plt.suptitle('Standardization result', fontsize=18)
plt.show()

# 識別器の構築

In [None]:
# CNNの構築
model = Sequential()

# 入力層，中間層01
model.add(Conv2D(hidden_nodes1, (3, 3), padding='same', input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(hidden_nodes1, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# 中間層02
model.add(Conv2D(hidden_nodes2, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(hidden_nodes2, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# 中間層03
model.add(Conv2D(hidden_nodes3, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(hidden_nodes3, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# 全結合層
model.add(GlobalAveragePooling2D())
model.add(Dense(output_nodes))
model.add(Activation('relu'))
model.add(Dropout(0.50))

# 10クラスの分類
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
# 現状のモデルの層を確認
model.summary()

# **学習過程の設定**

In [None]:
# 訓練過程の設定
model.compile(
    loss='categorical_crossentropy', # 損失関数，ラベルがone-hot エンコーディングで表現されている場合に使用
    optimizer='Adam',                # 最適化アルゴリズム
    metrics=['accuracy']
)

In [None]:
# EarlyStoppingの設定
es = keras.callbacks.EarlyStopping(
    monitor='val_loss', # 監視する変数   
    patience=20,        # 何エポックの間，monitorに変化がないことを許容するか
    verbose=0,
    mode='min'
)

# 更新された学習率を適用させる．
lr_change = LearningRateScheduler(lr_decay)

# **学習**

In [None]:
history=model.fit_generator(
    train_generator,                       # trainデータ
    epochs=EPOCHS,                         # 学習回数
    steps_per_epoch=steps_per_epoch,       
    validation_data=validation_generator,  # validationデータ
    validation_steps=validation_steps,
    verbose=1,
    callbacks=[lr_change,es],
)

# **識別器の評価**

In [None]:
# 精度の評価
print('＜Test_data＞')
scores = model.evaluate(X_test, y_test, verbose=1)
print('Test_loss   ：', scores[0])
print('Test_accuray：', scores[1])


print('\n＜Train_data＞')
scores = model.evaluate(train_generator, verbose=1)
print('Train_loss   ：', scores[0])
print('Train_accuray：', scores[1])

In [None]:
# 学習過程における識別精度の推移
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Accuracy')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.ylim([0.00, 1.00])
plt.legend(['acc', 'val_acc'], loc='lower right')
plt.show()

In [None]:
# 学習過程における損失の推移
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['loss', 'val_loss'])
plt.show()

# **識別器の保存**

In [None]:
#resultsディレクトリを作成
result_dir = 'results'
if not os.path.exists(result_dir):
    os.mkdir(result_dir)

print('save the architecture of a model')
json_string = model.to_json()
open(os.path.join(result_dir,'cnn_model.json'), 'w').write(json_string)
yaml_string = model.to_yaml()
open(os.path.join(result_dir,'cnn_model.yaml'), 'w').write(yaml_string)
print('save weights')
model.save_weights(os.path.join(result_dir,'cnn_model_weights.hdf5'))

# ***提出用ファイルの構築***

In [None]:
files = []

for i in range(1,300001):
    path_name = '../input/cifar10-submit-ds/test/' + str(i) + '.png'
    files.append(path_name)
    
print(files[:5])

# **提出形式に変換**

In [None]:
df = pd.DataFrame(pd.read_csv('../input/cifar-10/sampleSubmission.csv'))
#df = df.drop('label', axis=1)

df.head()

In [None]:
labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
pred2 = []

for i, file in enumerate(files):    
    x = []
    
    image = cv2.imread(file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    x.append(image)
        
    if i == 0: 
        plt.subplot(1, 1, 1)
        plt.imshow(x[0])
        plt.axis("off")
        
    # 画素値を0から1の範囲に変換
    x = np.asarray(x)
    x = x.astype('float32')
    x /= 255.0
    
    pred = model.predict(x)
    pred2.append(labels[np.argmax(pred)])
    
    if i == 0: 
        print(file, "\n")
        print(x[0][0][0], "\n")
    print("\r{:.10f}[%]".format(i+1/len(files)), end="")

In [None]:
df['label'] = pred2
df.head(20)

In [None]:
df.to_csv('submission.csv', index=False)