# Load Packages

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

import keras
from keras.models import Sequential
from keras.layers import *
from keras_preprocessing.image import ImageDataGenerator

import zipfile 

import seaborn as sns

import matplotlib.pyplot as plt
import seaborn as sns

from keras.optimizers import RMSprop,Adam


# Load CSV Files

In [None]:
train_data = pd.read_csv("../input/aerial-cactus-identification/train.csv", dtype=str)
test_data = pd.read_csv("../input/aerial-cactus-identification/sample_submission.csv", dtype=str)

In [None]:
train_data.head()

In [None]:
test_data.head()

In [None]:
test_data.tail()

In [None]:
test_data.info()

In [None]:
train_data['has_cactus'].value_counts()

Finding percentage of labels

In [None]:
train_data['has_cactus'].value_counts(normalize=True) * 100

Visualize Data

In [None]:
color_palette_list = ['#009ACD', '#ADD8E6']

fig, ax = plt.subplots()
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['text.color'] = '#909090'
plt.rcParams['axes.labelcolor']= '#909090'
plt.rcParams['xtick.color'] = '#909090'
plt.rcParams['ytick.color'] = '#909090'
plt.rcParams['font.size']=12

labels = ['cactus', 
         'Non cactus']
percentages = [75.06, 24.94]
explode=(0.1,0)
ax.pie(percentages, explode=explode, labels=labels,  
        autopct='%1.0f%%', 
       shadow=False, colors=color_palette_list[0:2],startangle=0,   
       pctdistance=1.2,labeldistance=1.4)
ax.axis('equal')
ax.set_title("Percentage Of Cactus and Non Cactus Images")
ax.legend(frameon=False, bbox_to_anchor=(1.5,0.8))

# Extract Zip Files

In [None]:
zip_ref_1 = zipfile.ZipFile('/kaggle/input/aerial-cactus-identification/test.zip')
zip_ref_1.extractall()

In [None]:
zip_ref_2 = zipfile.ZipFile('/kaggle/input/aerial-cactus-identification/train.zip')
zip_ref_2.extractall()

Count the number of images in each directory.

In [None]:
train_path = "train/"
test_path = "test/"
print('Training Images:', len(os.listdir(train_path)))
print('Testing Images: ', len(os.listdir(test_path)))

# Data Generators

[Tutorial for Keras from_from_dataframe](https://medium.com/@vijayabhaskar96/tutorial-on-keras-flow-from-dataframe-1fd4493d237c)


In [None]:
# Create two ImageDataGenerator objects. 
# You could include image augmentations here. We will discuss those in a later video. 

In [None]:
train_datagen = ImageDataGenerator(
       horizontal_flip=True,
       vertical_flip=True,
       brightness_range=[0.5, 1.5],
       fill_mode='reflect',                               
        rotation_range=15,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        validation_split=0.2)

In [None]:
test_datagen = ImageDataGenerator(
       #horizontal_flip=True,
       #vertical_flip=True,
       #brightness_range=[0.5, 1.5],
       #fill_mode='reflect',                               
        #rotation_range=15,
        rescale=1./255)
        #shear_range=0.2,
        #zoom_range=0.2)

In [None]:
tr_size = 17500
va_size = 3500
bs = 64

tr_steps = math.ceil(tr_size / bs)
va_steps = math.ceil(va_size / bs)

#math.ceil() function returns the smallest integral value greater than the number. 
#If number is already integer, same number is returned.

train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_data,
    directory = train_path,
    x_col = "id",
    y_col = "has_cactus",
    subset = "training",
    batch_size = bs,
    seed = 1,
    shuffle = True,
    class_mode = "categorical",
    target_size = (32,32))

valid_generator = train_datagen.flow_from_dataframe(
    dataframe = train_data,
    directory = train_path,
    x_col = "id",
    y_col = "has_cactus",
    subset = "validation",
    batch_size = bs,
    seed = 1,
    shuffle = True,
    class_mode = "categorical",
    target_size = (32,32))

test_generator = test_datagen.flow_from_dataframe(
    dataframe = test_data,
    directory = test_path,
    x_col = "id",
    y_col = None,
    batch_size = bs,
    seed = 1,
    shuffle = False,
    class_mode = None,
    target_size = (32,32))

# View Sample of Images

In [None]:
def training_images(seed):
    np.random.seed(seed)
    train_generator.reset()
    imgs, labels = next(train_generator)
    tr_labels = np.argmax(labels, axis=1)
    
    plt.figure(figsize=(14,14))
    for i in range(36):
        text_class = labels[i]
        plt.subplot(6,6,i+1)
        plt.imshow(imgs[i,:,:,:])
        if(text_class[0] == 1):
            plt.text(0, -2, 'Negative', color='r')
        else:
            plt.text(0, -2, 'Positive', color='b')
        plt.axis('off')
    plt.show()
    
    
training_images(2)

# Build Network

In [None]:
model = Sequential()
model.add(Conv2D(filters = 16, kernel_size = 3, padding = 'same', activation = 'relu', input_shape = (32, 32,3)))
model.add(Conv2D(filters = 16, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 16, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Dropout(0.2))
model.add(MaxPooling2D(pool_size = 3))
model.add(BatchNormalization())

model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Dropout(0.2))
model.add(MaxPooling2D(pool_size = 3))
model.add(BatchNormalization())

model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Dropout(0.2))
model.add(MaxPooling2D(pool_size = 3))
model.add(BatchNormalization())

model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())


model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation = 'sigmoid'))
model.summary()

# Train Network

In [None]:
# Set a learning rate and fill in the missing pieces in the fig_generator method.


optimizer=Adam(learning_rate=0.001,beta_1=0.9,beta_2=0.999,epsilon=1e-08)

model.compile(optimizer=optimizer,loss=['binary_crossentropy'],metrics=['accuracy'])

h1 = model.fit_generator(train_generator, steps_per_epoch=tr_steps, epochs=18, validation_data=valid_generator, validation_steps=va_steps, verbose=1)

In [None]:
start = 1
ep_rng = np.arange(start,len(h1.history['accuracy']))

plt.figure(figsize=[12,6])
plt.subplot(1,2,1)
plt.plot(ep_rng, h1.history['accuracy'][start:], label='Training Accuracy')
plt.plot(ep_rng, h1.history['val_accuracy'][start:], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.subplot(1,2,2)
plt.plot(ep_rng, h1.history['loss'][start:], label='Training Loss')
plt.plot(ep_rng, h1.history['val_loss'][start:], label='Validation Loss')
plt.xlabel('Epoch')
plt.legend()

plt.show()

In [None]:
# Consider training for additional epochs, perhaps with a lower learning rate. 

# Prepare Submission

In [None]:
test_size = 4000
test_steps = math.ceil(test_size / bs)

In [None]:
test_pred = model.predict_generator(test_generator, test_steps, verbose=1)

In [None]:
test_fnames = test_generator.filenames
pred_classes = np.argmax(test_pred, axis=1)

print(np.sum(pred_classes == 0))
print(np.sum(pred_classes == 1))


In [None]:
submission = pd.DataFrame({
    'id':test_fnames,
    'has_cactus':pred_classes
})

submission.to_csv('submission.csv', index=False)

submission.head()

