In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

## Unzipping train dataset

In [None]:
!unzip ../input/train.zip

## Visualizing an image from train dataset

In [None]:
from skimage import io 

image = io.imread("train/0011485b40695e9138e92d0b3fb55128.jpg")
io.imshow(image)

## Import necessary libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

import keras.backend as K
from keras import layers
from keras.layers import Input, Add, Dense, Dropout, MaxPooling2D, Flatten
from keras.models import Model
from keras import optimizers
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import layer_utils
from keras.applications import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.callbacks import History, ModelCheckpoint, Callback
from sklearn.metrics import roc_auc_score

## Read the data from train.csv

In [None]:
train_df = pd.read_csv("../input/train.csv")
train_df.head()

### Exploratory Data Analysis

In [None]:
train_df["has_cactus"].value_counts()

## Create Image Data Generators for train and validation

In [None]:
datagen = ImageDataGenerator(rescale=1/255.0)
train_dir = "train/"
batch_size = 64
image_size = 32
train_df.has_cactus = train_df.has_cactus.astype(str)
train_generator = datagen.flow_from_dataframe(dataframe=train_df[:14001],directory=train_dir,x_col='id',
                                            y_col='has_cactus',class_mode='binary',batch_size=batch_size,
                                           target_size=(image_size,image_size))


validation_generator = datagen.flow_from_dataframe(dataframe=train_df[14001:],directory=train_dir,x_col='id',
                                                y_col='has_cactus',class_mode='binary',batch_size=batch_size,
                                                target_size=(image_size,image_size))

## Model creation using Resnet50 with trainable parameter True

In [None]:
num_classes=1

def get_model():
    
    # Get base model: ResNet 50 - don't include the last set of layers dense and FC
    base_model = ResNet50(weights='imagenet',include_top=False,input_shape=(32, 32, 3))
    
    # Freeze the layers in base model
    for layer in base_model.layers:
        layer.trainable = True
        
    # Get output from base model
    base_model_output = base_model.output
    
    # Add our layers of Dense and FC at the end
    
    # FC layer and softmax
    last_layers = Flatten()(base_model_output)
    last_layers = Dense(512,activation='relu')(last_layers)
    last_layers = Dense(num_classes,activation='sigmoid',name='fcnew')(last_layers)
    
    model = Model(inputs=base_model.input,outputs=last_layers)
    return model

## Compiling the model

In [None]:
model = get_model()
optimizer = optimizers.adam(lr=0.0001)
model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])
model.summary()

## Add callback functions for loss & AUC

In [None]:
class Loss(Callback):    
    def on_train_begin(self, logs={}):
        self.losses = []
        logs['val_auc'] = 0
            
    def on_epoch_begin(self, epoch, logs={}):
        return
    
    def on_epoch_end(self, epoch, logs={}):
        self.losses.append(logs['loss'])
        
        y_p = []
        y_v = []
        for i in range(len(validation_generator)):
            x_val, y_val = validation_generator[i]
            y_pred = self.model.predict(x_val)
            y_p.append(y_pred)
            y_v.append(y_val)
        y_p = np.concatenate(y_p)
        y_v = np.concatenate(y_v)
        roc_auc = roc_auc_score(y_v, y_p)
        print ('\nVal AUC for epoch{}: {}'.format(epoch, roc_auc))
        logs['val_auc']=roc_auc

## Fit Generator

In [None]:
epochs = 10

loss = Loss()
checkpoint = ModelCheckpoint("best_model.hdf5", monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min', period=1)
history = model.fit_generator(train_generator,
                    steps_per_epoch=train_generator.n//batch_size,
                   validation_data=validation_generator,
                   validation_steps=validation_generator.n//batch_size,
                   epochs=epochs,
                   callbacks=[loss,checkpoint]
    )


## Plot accuracy for every epoch

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title("Accuracy for every epoch")
plt.xlabel('epochs')
plt.ylabel('Accuracy')
plt.legend(['train','validation'],loc='lower right')
plt.show()

In [None]:
val_model = get_model()
val_model.load_weights('best_model.hdf5')

## Predict on test data and submit predictions

In [None]:
import os
test_dir = "../input/test/test/"
test_df=pd.read_csv("../input/sample_submission.csv")
for _ , _, files in os.walk(test_dir):
    i=0
    for file in files:
        image=io.imread(os.path.join(test_dir, file))
        test_df.iloc[i,0]=file
        image=image.astype(np.float32)/255.0
        test_df.iloc[i,1]=val_model.predict(image.reshape((1, 32, 32, 3)))[0][0]
        i+=1

In [None]:
test_df.to_csv("sample_submission.csv",index=False)