In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as img
import tensorflow as tf
import os

plt.style.use('ggplot')

In [None]:
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/sample_submission.csv')

In [None]:
!unzip '../input/train.zip'
!unzip '../input/test.zip'

In [None]:
train['has_cactus'] = train['has_cactus'].astype('str')
train['has_cactus'].value_counts()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train_df, valid_df = train_test_split(train, test_size=0.1, stratify=train['has_cactus'], random_state=42)

In [None]:
rows, cols = (2, 5)

fig, ax = plt.subplots(rows,cols,figsize=(20,5))

for j in range(rows):
    for i, sample in enumerate(train_df[j * cols:rows * cols - (cols * (rows - (j + 1)))].values):
        path = os.path.join('./train/', sample[0])
        ax[j][i].imshow(img.imread(path))
        ax[j][i].set_title('Label: ' + str(sample[1]))
        ax[j][i].grid(False)
        ax[j][i].set_xticklabels([])
        ax[j][i].set_yticklabels([])

In [None]:
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255., 
    vertical_flip=True,
    horizontal_flip=True
)
datagen_valid = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [None]:
img_size = 224

In [None]:
train_data = datagen_train.flow_from_dataframe(
    dataframe=train_df, directory='./train',
    x_col='id', y_col='has_cactus', batch_size=64,
    class_mode='binary', target_size=(img_size, img_size)
)

validation_data = datagen_valid.flow_from_dataframe(
    dataframe=valid_df, directory='./train',
    x_col='id', y_col='has_cactus', batch_size=64,
    class_mode='binary', target_size=(img_size, img_size)
)

In [None]:
model_vgg16 = tf.keras.applications.VGG16(include_top=False, input_shape=(img_size, img_size, 3))
model_resnet50 = tf.keras.applications.ResNet50(include_top=False, input_shape=(img_size, img_size, 3))

In [None]:
for layer in model_vgg16.layers:
    layer.trainable = False

In [None]:
vgg_classifier = tf.keras.Sequential([
    model_vgg16,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

vgg_classifier.summary()

In [None]:
for layer in model_resnet50.layers:
    layer.trainable = False

In [None]:
resnet_classifier = tf.keras.Sequential([
    model_resnet50,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

resnet_classifier.summary()

In [None]:
vgg_opt = tf.keras.optimizers.Adam()
vgg_classifier.compile(optimizer=vgg_opt, loss='binary_crossentropy', metrics=['accuracy'])

resnet_opt = tf.keras.optimizers.Adam()
resnet_classifier.compile(optimizer=resnet_opt, loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=10,
    restore_best_weights=True
)

EPOCHS = 10

In [None]:
vgg_history = vgg_classifier.fit(
    train_data,
    epochs=EPOCHS,
    validation_data=validation_data, 
    callbacks=[early_stopping_cb]
)

In [None]:
vgg_history_df = pd.DataFrame(vgg_history.history)
vgg_history_df.plot(figsize=(13, 10))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
_ = vgg_classifier.evaluate(validation_data)

In [None]:
resnet_history = resnet_classifier.fit(
    train_data,
    epochs=EPOCHS,
    validation_data=validation_data, 
    callbacks=[early_stopping_cb]
)

In [None]:
resnet_history_df = pd.DataFrame(resnet_history.history)
resnet_history_df.plot(figsize=(13, 10))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
_ = resnet_classifier.evaluate(validation_data)

In [None]:
best_model = vgg_classifier

In [None]:
test_data = datagen_valid.flow_from_dataframe(
    dataframe=test, directory="./test",
    x_col="id", y_col=None, shuffle=False, 
    class_mode=None, target_size=(img_size, img_size)
)

In [None]:
answer = pd.DataFrame({'id': test['id']})

In [None]:
answer['has_cactus'] = best_model.predict(test_data, verbose=True)

In [None]:
answer.head()

In [None]:
answer.to_csv('submission.csv',  sep=',' , line_terminator='\n', index=False)