# Contents
 1. [Load Packages](#1.-Load-Packages)
 2. [Check Dataset](#2.-Check-Datasets)
 3. [Hyperparameter](#3.-Hyperparameter)
 4. [Data Preprocessing](#4.-Data-Preprocessing) 
 5. [Model](#5.-Model)
 6. [Train](#6.-Train)
 7. [Evaluate](#7.-Evaluate)
 8. [Save Submission](#8.-Save-Submission)
 ---

# 1. Load Packages

In [None]:
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from PIL import Image

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

---
# 2. Check Datasets

> ---
> ## 2-1. Dataset unzip

In [None]:
!unzip ../input/aerial-cactus-identification/train.zip
!unzip ../input/aerial-cactus-identification/test.zip

> ---
> ## 2-2. train dataframe merge

In [None]:
# [ train data ]

# train image paths
file_path = '/kaggle/working/'
train_dir = '/kaggle/working/train/'
train_fnames = os.listdir(train_dir)
train_fpaths = [os.path.join(train_dir,fname) for fname in train_fnames]

# train labels
csv_path ='../input/aerial-cactus-identification/train.csv'
df = pd.read_csv(csv_path)
df.head()

# merge train image paths with labels
train_df = pd.DataFrame(data={'id':train_fpaths,'has_cactus':df['has_cactus']})
train_df = train_df.astype(str) # must replace label dtype to strings

print('classes : ',set(train_df['has_cactus']))
print('total train images : ',len(train_df))
print(train_df.head())

# sample image
sample = train_df['id'][0]
img_sample = Image.open(sample)
image = np.array(img_sample)

print(image.shape)
plt.imshow(image)


> ---
> ## 2-3. check sample submission

In [None]:
sub_sample = '../input/aerial-cactus-identification/sample_submission.csv'
sample_df = pd.read_csv(sub_sample)
print('submission sample 수 : ',len(sample_df))

test_dir = '/kaggle/working/test/'
test_names = os.listdir(test_dir)
test_paths = [os.path.join(test_dir,fname) for fname in test_names]
print('테스트셋 수 : ',len(test_paths))
sample_df.head()

> ---
> ## 2-4. train test split

In [None]:
test_df = train_df[-500:]
train_df = train_df[:-500]

---
# 3. Hyperparameter

In [None]:
input_shape = (32,32,3)
batch_size = 32
num_classes =2
num_epochs = 5
learning_rate = 0.01

---
# 4. Data Preprocessing

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255.,
                                  width_shift_range=0.3,
                                  zoom_range=0.2,
                                  horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255.)

In [None]:
train_generator = train_datagen.flow_from_dataframe(train_df,
                                                   x_col='id',
                                                   y_col='has_cactus',
                                                   target_size=input_shape[:2],
                                                   batch_size=batch_size,
                                                   class_mode='sparse')

test_generator = test_datagen.flow_from_dataframe(test_df,
                                                 x_col='id',
                                                 y_col='has_cactus',
                                                 target_size=input_shape[:2],
                                                 batch_size=batch_size,
                                                 class_mode='sparse')

---
# 5. Model

In [None]:
# model = tf.keras.applications.ResNet101(
#     include_top=True, weights='imagenet', input_tensor=None,
#     input_shape=None, pooling=None, classes=1000
# )

# model = tf.keras.applications.VGG16(
#     include_top=True, weights='imagenet', input_tensor=None,
#     input_shape=None, pooling=None, classes=1000,
#     classifier_activation='softmax'
# )

inputs = layers.Input(input_shape)
net = layers.Conv2D(64, (3, 3), padding='same')(inputs)
net = layers.Conv2D(64, (3, 3), padding='same')(net)
net = layers.Conv2D(64, (3, 3), padding='same')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)

net = layers.Conv2D(128, (3, 3), padding='same')(net)
net = layers.Conv2D(128, (3, 3), padding='same')(net)
net = layers.Conv2D(128, (3, 3), padding='same')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.25)(net)

net = layers.Conv2D(256, (3, 3), padding='same')(net)
net = layers.Conv2D(256, (3, 3), padding='same')(net)
net = layers.Conv2D(256, (3, 3), padding='same')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.25)(net)

net = layers.Conv2D(512, (3, 3), padding='same')(net)
net = layers.Conv2D(512, (3, 3), padding='same')(net)
net = layers.Conv2D(512, (3, 3), padding='same')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.25)(net)

net = layers.Conv2D(512, (3, 3), padding='same')(net)
net = layers.Conv2D(512, (3, 3), padding='same')(net)
net = layers.Conv2D(512, (3, 3), padding='same')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.25)(net)

net = layers.Flatten()(net)
net = layers.Dense(512)(net)
net = layers.Activation('relu')(net)
net = layers.Dropout(0.5)(net)
net = layers.Dense(num_classes)(net)
net = layers.Activation('softmax')(net)

model = tf.keras.Model(inputs=inputs, outputs=net)

model.summary()

---
# 6. Train

In [None]:
model.compile(loss='sparse_categorical_crossentropy',
             optimizer=tf.keras.optimizers.Adam(learning_rate),
             metrics=['accuracy'])

In [None]:
model.fit_generator(train_generator,
                    steps_per_epoch=len(train_generator),
                    epochs=num_epochs,
                    validation_data = test_generator,
                    validation_steps=len(test_generator))

In [None]:
model_dir = './model_cactus'
model.save(model_dir)

model = keras.models.load_model(model_dir)

In [None]:
# !rm -r train
# !rm -r test

---
# 7. Evaluate

In [None]:
test_dir = '/kaggle/working/test/'
test_names = os.listdir(test_dir)
test_paths = [os.path.join(test_dir,fname) for fname in test_names]

preds = []

for test_path in tqdm_notebook(test_paths):
    img_pil = Image.open(test_path)
    image = np.array(img_pil)
    
    pred = model.predict(image[tf.newaxis, ...])
    pred = np.argmax(pred)
    preds.append(pred)
    

plt.imshow(image)
print('sample pred : ', pred)

---
# 8. Save Submission

In [None]:
submission_df = pd.DataFrame(data={'id':sample_df['id'],'has_cactus':preds})
submission_df.to_csv('submission.csv',index=False)
submission_df.head()