In [None]:
#!pip install tensorflow-gpu==2.0
import os
print(os.listdir("../input"))

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from tqdm import tqdm, tqdm_notebook
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense, SpatialDropout2D, Conv2D, MaxPooling2D
from keras.applications import VGG16, VGG19, ResNet50
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, LearningRateScheduler

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import regularizers

import tensorflow as tf
# Any results you write to the current directory are saved as output.

In [None]:
train_dir = "../input/train/train/"
test_dir = "../input/test/test/"
train_df = pd.read_csv('../input/train.csv')

In [None]:

x_train = []
y_train = []
imges = train_df['id'].values
dim_x = 32
dim_y = 32
dim_ch = 3
for img_id in tqdm_notebook(imges):
    x_train.append(cv2.imread(train_dir + img_id))
    y_train.append(train_df[train_df['id'] == img_id]['has_cactus'].values[0])  
x_train = np.asarray(x_train) #np.reshape(x_train,(dim_x,dim_y,dim_ch))
x_train = x_train.astype('float32')
#x_train /= 255
y_train = np.asarray(y_train)
nb_valid = int(0.1 * len(x_train))
x_valid = x_train[-nb_valid:,...]
y_valid = y_train[-nb_valid:,...]
x_train = x_train[:nb_valid,...]
y_train = y_train[:nb_valid,...]



In [None]:
batch_size = 32

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,\
                                   horizontal_flip = True,\
                                   shear_range=0.2,\
                                   brightness_range=[0.9,1.1],\
                                   channel_shift_range=0.15,\
                                   rotation_range=90.0,\
                                   zoom_range = 0.2,\
                                   width_shift_range = 0.1,\
                                   height_shift_range=0.1\
                                )

#brightness_range=(0.9995,1.0005),\

train_generator = train_datagen.flow(x=x_train, y=y_train,
    batch_size=batch_size,
    shuffle=True)

test_datagen = ImageDataGenerator(rescale=1./255)

valid_generator = test_datagen.flow(x=x_valid, y=y_valid,
    batch_size=batch_size,
    shuffle=True)

In [None]:
import keras.applications
dir(keras.applications)

In [None]:
if(0):
    # define model 
    my_net = VGG19(weights='imagenet', 
                      include_top=False, 
                      input_shape=(dim_x, dim_y, 3))
elif(0):
    #0.9768, no spatial dropout, dense dropout 0.25
    my_net = ResNet50(weights='imagenet', 
                      include_top=False, 
                      input_shape=(dim_x, dim_y, 3))
elif(0):
    my_net = VGG16(weights='imagenet', 
                      include_top=False, 
                      input_shape=(dim_x, dim_y, 3))
elif(0):
    my_net = keras.applications.NASNetMobile(weights=None,\
                      include_top=False, 
                      input_shape=(dim_x, dim_y, 3))
elif(0):
    my_net = keras.applications.MobileNetV2(weights='imagenet',\
                      include_top=False, 
                      input_shape=(dim_x, dim_y, 3))
elif(1):
    my_net = keras.applications.DenseNet121(weights='imagenet',\
                      include_top=False, 
                      input_shape=(dim_x, dim_y, 3))    
    

In [None]:
my_net.trainable = True
#vgg19_net.summary()
model = Sequential()
#model.add(SpatialDropout2D(rate=0.33,input_shape=(dim_x,dim_y,dim_ch)))
model.add(my_net)
model.add(Flatten())
model.add(Dropout(rate=0.95))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.summary()

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=1e-4), 
              metrics=['accuracy'])

In [None]:
# callbacks
def learning_schedule(epoch):
    if epoch <= 1:
        lr = 3e-4
    elif epoch <= 10:
        lr =1e-4
    elif epoch <= 50:
        lr = 1e-5
    else:
        lr = 1e-6
    return lr

# callbacks
lrate = LearningRateScheduler(learning_schedule)
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto')

In [None]:
%%time
# Train model
batch_size = 32
nb_epochs = 2048

history = model.fit_generator(generator=train_generator,\
                                steps_per_epoch=int(17500/batch_size),\
                                validation_data=valid_generator,\
                                validation_steps=50,\
                                epochs=nb_epochs,\
                                callbacks = [early, lrate],\
                                verbose=2)

In [None]:
plt.figure(figsize=(15,12))
plt.subplot(211)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title("Accuracy and Loss",fontsize=28)
plt.ylabel('accuracy',fontsize=24)
plt.legend(['Train','Val'],fontsize=18)

plt.subplot(212)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch',fontsize=24)
plt.ylabel('loss',fontsize=24)
plt.legend(['Train','Val'],fontsize=18)
plt.show()

In [None]:
%%time
x_test = []
test_imgs = []
for img_id in tqdm_notebook(os.listdir(test_dir)):
    x_test.append(cv2.imread(test_dir + img_id))     
    test_imgs.append(img_id)
x_test = np.asarray(x_test)
x_test = x_test.astype('float32')
x_test /= 255

In [None]:
# Prediction
test_predictions = model.predict(x_test)

In [None]:
sub_df = pd.DataFrame(test_predictions, columns=['has_cactus'])
sub_df['has_cactus'] = sub_df['has_cactus'].apply(lambda x: 1 if x > 0.75 else 0)

sub_df['id'] = ''
cols = sub_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
sub_df=sub_df[cols]

for i, img in enumerate(test_imgs):
    sub_df.set_value(i,'id',img)
    
sub_df.head()

sub_df.to_csv('./submission.csv',index=False)