# 画像から犬か猫に分類

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
## zipファイルを解凍し，DataFrameに格納
import os, shutil, zipfile

data = ['train', 'test1']

for x in data:
    with zipfile.ZipFile('../input/dogs-vs-cats/' + x + '.zip', 'r') as z:
        z.extractall(".")

In [None]:
## ライブラリ読み込み
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import os

In [None]:
Image_Width=128
Image_Height=128
Image_Size=(Image_Width,Image_Height)
Image_Channels=3

In [None]:
## 画像のファイル名から正解ラベル生成
import pandas as pd

filenames = os.listdir('./train/')
categories = []

for Fname in filenames:
    DogCat = Fname.split('.')[0]
    if DogCat == 'dog':
        categories.append(1)
    else:
        categories.append(0)

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

df.head()

In [None]:
## モデル作成
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,\
     Dropout,Flatten,Dense,Activation,\
     BatchNormalization

model=Sequential()

model.add(Conv2D(32,(3,3),activation='relu',input_shape=(Image_Width,Image_Height,Image_Channels)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(128,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2,activation='softmax'))

model.compile(loss='categorical_crossentropy',
  optimizer='rmsprop',metrics=['accuracy'])

In [None]:
## 棒グラフ作成
print(len(df['category']))
df['category'].value_counts().plot.bar()

In [None]:
## モデルの要約を出力
model.summary()

### ランダムに選択した画像を出力

In [None]:
from tensorflow.keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
import random
%matplotlib inline

sample = random.sample(filenames, 16)
plt.figure(figsize = (12, 12))

for i in range(0, 16):
    plt.subplot(4, 4, i+1)
    fname = sample[i]
    image = load_img('./train/' + fname)
    plt.imshow(image)
    plt.axis('off')
plt.tight_layout()
plt.show()

### 回転・反転・引き伸ばしした画像を出力

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import array_to_img, img_to_array, load_img

input_path = './train/cat.477.jpg'
count = 10

train_datagen = ImageDataGenerator(rotation_range=15,
                                rescale=1./255,
                                shear_range=0.1,
                                zoom_range=0.2,
                                horizontal_flip=True,
                                width_shift_range=0.1,
                                height_shift_range=0.1
                                )
plt.figure(figsize = (16, 6))
image = img_to_array(load_img(input_path))

image = image.reshape((1,) + image.shape)

images_flow = train_datagen.flow(image, batch_size=1)
for i in range(8):
    batches = images_flow.next()
    
    # 画像として表示するため、４次元から3次元データにし、配列から画像に変換
    gen_img = array_to_img(batches[0])
 
    plt.subplot(2, 4, i + 1)
    plt.imshow(gen_img)
    plt.axis('off')
plt.show()

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystop = EarlyStopping(patience = 10)
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_acc',patience = 2,verbose = 1,factor = 0.5,min_lr = 0.00001)
callbacks = [earlystop,learning_rate_reduction]

In [None]:
df["category"] = df["category"].replace({0:'cat',1:'dog'})
train_df,validate_df = train_test_split(df,test_size=0.20,random_state=42)

train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

total_train=train_df.shape[0]
total_validate=validate_df.shape[0]
batch_size=15

In [None]:
train_generator = train_datagen.flow_from_dataframe(train_df,
                                                 "./train/",x_col='filename',y_col='category',
                                                 target_size=Image_Size,
                                                 class_mode='categorical',
                                                 batch_size=batch_size)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=Image_Size,
    class_mode='categorical',
    batch_size=batch_size
)

test_datagen = ImageDataGenerator(rotation_range=15,
                                rescale=1./255,
                                shear_range=0.1,
                                zoom_range=0.2,
                                horizontal_flip=True,
                                width_shift_range=0.1,
                                height_shift_range=0.1)

test_generator = train_datagen.flow_from_dataframe(train_df,
                                                 "./test/",x_col='filename',y_col='category',
                                                 target_size=Image_Size,
                                                 class_mode='categorical',
                                                 batch_size=batch_size)


In [None]:
epochs=10
history = model.fit_generator(
    train_generator, 
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size,
    callbacks=callbacks
)

In [None]:
model.save("model1_catsVSdogs_10epoch.h5")

In [None]:
test_filenames = os.listdir("./test1/")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]
nb_samples

In [None]:
test_generator = train_datagen.flow_from_dataframe(test_df,
                                                   "./test1/", 
                                                   x_col='filename', 
                                                   y_col=None,
                                                   target_size=Image_Size,
                                                   class_mode=None,
                                                   batch_size=batch_size)

In [None]:
## モデル保存
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))

In [None]:
test_df['category'] = 0
test_df['category'] = predict
test_df.head()

### 学習結果を画像を出力

In [None]:
from tensorflow.keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
import random
%matplotlib inline

sample = random.sample(test_filenames, 16)
fig = plt.figure(figsize = (12, 12))

for i in range(0, 16):
    ax = fig.add_subplot(4, 4, i+1)
    fname = sample[i]
    image = load_img('./test1/' + fname)
    valu = test_df['category'][test_df['filename']==fname]
    if valu.iloc[0] < 0.5:
        ax.set_title('cat'+'  '+str(valu.iloc[0]))
    else:
        ax.set_title('dog'+'  '+str(valu.iloc[0]))
    ax.imshow(image)
    ax.axis('off')
plt.tight_layout()
plt.show()

In [None]:
#test_df['filename'] = test_df['filename'].str.extract('(\d+)', expand=True)
#test_df = test_df.rename(columns={'filename': 'id', 'category': 'label'})

#test_df.head()

In [None]:
#test_df.to_csv('submission.csv', index=False)