In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os, cv2, random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline 

from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Dense, Activation, Conv2D, BatchNormalization, MaxPooling2D
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
%env KERAS_BACKEND=theano

import gc

In [None]:
#zipファイルの解凍
! unzip "../input/dogs-vs-cats-redux-kernels-edition/train.zip" -d train
! unzip "../input/dogs-vs-cats-redux-kernels-edition/test.zip" -d test

In [None]:
#1.データの準備
TRAIN_DIR = './train/train/'
TEST_DIR = './test/test/'

#画像サイズ=ROWS×COLS
ROWS = 64
COLS = 64
CHANNELS = 3

#学習データ数、テストデータ数
TRAIN_NO = 7500  #(max:7500)
VALID_NO = 5000  #(max:5000)
#TEST_NO = 25


def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR) #cv2.IMREAD_GRAYSCALE
    
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)

def ex_data(data,ex_type):
    count = len(data)
    ex_data = np.ndarray((3,count, CHANNELS, ROWS, COLS), dtype=np.uint8)
    if ex_type == 0: 
        for j, image in enumerate(data): 
            ex_data[0][j] = cv2.flip(image.T, 0).T
            ex_data[1][j] = cv2.flip(image.T, 1).T
            ex_data[2][j] = cv2.flip(image.T, -1).T
    elif ex_type == 1:
        for j, image in enumerate(data): 
            ex_data[0][j] = cv2.rotate(image.T, cv2.ROTATE_90_CLOCKWISE).T
            ex_data[1][j] = cv2.rotate(image.T, cv2.ROTATE_180).T
            ex_data[2][j] = cv2.rotate(image.T, cv2.ROTATE_90_COUNTERCLOCKWISE).T
                
    return ex_data[0].tolist() + ex_data[1].tolist() + ex_data[2].tolist()

def prep_data(images):
    count = len(images)
    data = np.ndarray((count, CHANNELS, ROWS, COLS), dtype=np.uint8)
    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image.T
        #if i%250 == 0: print('Processed {} of {}'.format(i, count))

    return data

def make_labels(images):
    labels = []
    for i in images:
        if 'dog' in i:
            labels.append(1)
        else:
            labels.append(0)
            
    return labels

#train_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] 
train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]
test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]

#学習用データをから犬と猫をランダムにTRAIN_NOずつ選ぶ
random.seed(123456)
random.shuffle(train_dogs)
random.shuffle(train_cats)
train_images = train_dogs[:TRAIN_NO] + train_cats[:TRAIN_NO]

#検証用データ(学習用データからかぶらないように選ぶ)
valid_images = train_dogs[7500:7500+VALID_NO] + train_cats[7500:7500+VALID_NO]

#train_dogsとtrain_catsの開放
del train_dogs
del train_cats
gc.collect()

#学習用データの下処理と拡張(いったんlistにしてデータを結合してからndarrayに戻す)
train = prep_data(train_images).tolist()    #original
#train = train + ex_data(np.array(train),0)    #original + 反転
#train = train + ex_data(np.array(train),1)    #original + 回転
#train = train + ex_data(np.array(train),0) + ex_data(np.array(train),1)    #original + 反転 + 回転
train = np.array(train)

#教師データの作成(model.fit()ではnd.array型)
labels_train = np.array(make_labels(train_images))    #original
#labels_train = np.array(make_labels(train_images)*4)    #original + 反転(or回転)
#labels_train = np.array(make_labels(train_images)*7)    #original + 反転 + 回転

#学習用データとその教師データのシャッフル(ndarray型)
np.random.seed(123)
np.random.shuffle(train)
np.random.seed(123)
np.random.shuffle(labels_train)

#検証用データの下処理とその教師データ
valid = prep_data(valid_images)
labels_valid = np.array(make_labels(valid_images))

#テストデータ(提出用のデータ)
#test_images =  test_images

#テストデータの下処理
test = prep_data(test_images)

#テストデータのid
test_id = []
for i,path in enumerate(test_images):
    temp = path.replace('./test/test/','')
    test_id.append(int(temp.replace('.jpg','')))

print('(data_no,channels,cols.row)')
print("Train shape: {}".format(train.shape))
print("Valid shape: {}".format(valid.shape))
print("labels_train shape: {}".format(len(labels_train)))
print("labels_valid shape: {}".format(len(labels_valid)))
print('train:{}'.format(type(train)))
print('labels_train:{}'.format(type(labels_train)))
print("Test shape: {}".format(test.shape))
print("Test_id: {}".format(len(test_id)))

In [None]:
#2.ニューラルネットワークの構築
optimizer = RMSprop(lr=1e-4)
objective = 'binary_crossentropy'

def catdog():
    
    model = Sequential()
    
    model.add(Conv2D(32, (3, 3), activation='relu', strides=(1, 1),padding='same', input_shape=(CHANNELS, ROWS, COLS)))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=(1, 1),padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=(1, 1),padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=(1, 1),padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1),padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1),padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Conv2D(256, (3, 3), activation='relu', strides=(1, 1),padding='same'))
    model.add(Conv2D(256, (3, 3), activation='relu', strides=(1, 1),padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    
    return model


model = catdog()


In [None]:
#3.学習、4.検証
nb_epoch = 100
batch_size = 16

checkpoint_path = 'XXX.h5'

## Callback for loss logging per epoch
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.accuracy = []
        self.val_losses = []
        self.val_accuracy = []
        
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.accuracy.append(logs.get('accuracy'))
        self.val_losses.append(logs.get('val_loss'))
        self.val_accuracy.append(logs.get('val_accuracy'))
        
def run_catdog():
    history = LossHistory()
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
    checkpoint = ModelCheckpoint(filepath=checkpoint_path,monitor='val_loss',verbose=1,save_best_only=True,save_weights_only=False,mode='min',period=1)
    model.fit(train, labels_train, batch_size=batch_size, epochs=nb_epoch,
                        validation_data=(valid,labels_valid), verbose=1, shuffle=True,callbacks=[history,early_stopping,checkpoint])
    #predictions = model.predict(np.array(test), verbose=1)
    
    return history

history = run_catdog()

In [None]:
#結果の保存

#ファイル名
fname ='T'+str(len(train))+'_V'+str(len(valid))

loss = history.losses
val_loss = history.val_losses
accuracy = history.accuracy
val_accuracy = history.val_accuracy

#データフレームとして保存
df = pd.DataFrame({'loss':loss,'val_loss':val_loss,
                   'accuracy':accuracy,'val_accuracy':val_accuracy})
df.to_csv(fname+'_b'+str(batch_size)+'.txt')

In [None]:
#val_loss最小となったモデルを読み込み

#検証データで保存できているか確認
model.load_weights(checkpoint_path)
loss,acc = model.evaluate(valid,  labels_valid, verbose=2)

#テストデータの予測
predictions = model.predict(np.array(test), verbose=1)

#提出用ファイルに書き込む
submission = pd.read_csv('../input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv')
submission['label'] = predictions
submission['id'] = test_id
submission.to_csv('submission.csv', index=False)

In [None]:
#ディレクトリの整理
import shutil

os.remove('XXX.h5')
shutil.rmtree('./train/')
shutil.rmtree('./test/')