In [None]:
import tensorflow as tf
import numpy as np # math (array ; .expand_dims ; .squeeze Remove 1-dimensional entries of the shape ; )
import pandas as pd # import dataset (.read_csv ; )
import cv2
#from keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt # plotting (.imshow to render images ; )
import matplotlib.image as mpimg
%matplotlib inline

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout, MaxPooling2D, LSTM, CuDNNLSTM, Flatten, Reshape, ZeroPadding2D, Convolution2D, BatchNormalization, Activation  # CuDNNLSTM only on GPU
from tensorflow.keras.callbacks import EarlyStopping

from tqdm import tqdm, tqdm_notebook
import random as rn

In [None]:
!ls ../input/aerial-cactus-identification

In [None]:
PATH = '../input/aerial-cactus-identification/'

# Load Data

In [None]:
# Loading Dataframes

submissionDf = pd.read_csv(PATH + 'sample_submission.csv')
submissionDf.head()

In [None]:
trainDf = pd.read_csv(PATH + 'train.csv')
trainDf.head()

In [None]:
TRAINPATH = '../input/aerial-cactus-identification/train/train/'
TESTPATH = '../input/aerial-cactus-identification/test/test/'

In [None]:
# Load Images

X_tr = []
Y_tr = []

imgs = trainDf['id'].values

for img_id in tqdm_notebook(imgs):
    X_tr.append(cv2.imread(TRAINPATH + img_id))    
    Y_tr.append(trainDf[trainDf['id'] == img_id]['has_cactus'].values[0])  
    
X_tr = np.asarray(X_tr)
X_tr = X_tr.astype('float32')
X_tr /= 255
Y_tr = np.asarray(Y_tr)

## Data separation :
### * 12000 imgs for training
### * 3000 imgs for validation (split made in model.fit())
### * 2500 imgs for testing
### * Submission set of 4000 images for competition scoring

In [None]:
# Separation train set and validation set

x_tr = X_tr[:15000]
y_tr = Y_tr[:15000]

x_te = X_tr[15000:]
x_te = X_tr[15000:]

# Build Model

## Model 1

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=40)
mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

In [None]:
model = Sequential()

model.add(Conv2D(32, (2,2), strides=(1,1), padding='same', activation='relu', input_shape=(32,32,3)))
model.add(Conv2D(32, (2,2), strides=(1,1), padding='same', activation='relu'))
model.add(Conv2D(32, (2,2), strides=(1,1), padding='same', activation='relu'))
model.add(Conv2D(32, (2,2), strides=(1,1), padding='same', activation='relu'))
model.add(Conv2D(32, (2,2), strides=(1,1), padding='same', activation='relu'))
model.add(Flatten())
model.add(Dense(1, activation= 'sigmoid'))

model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
 
history = model.fit(x_tr,y_tr,epochs=100,batch_size=20, validation_split=0.2, callbacks=[es, mc]) 

In [None]:
print (history.history.keys())

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()  

In [None]:
preds = np.vectorize(lambda x: 1 if x > 0.75 else 0)(model.predict(x_te))
preds = np.resize(preds,(2500))

## Loading ground truth for the test set

In [None]:
trues = trainDf.iloc[-2500:]['has_cactus'].values

## Score : Model 1

In [None]:
np.sum(preds == trues)/2500

## Model 2

In [None]:
model2 = Sequential()

model2.add(Conv2D(50,kernel_size=(3,3),strides=(1,1),padding='same', activation='relu', input_shape=x_tr.shape[1:]))
model2.add(MaxPooling2D(pool_size=(2,2)))
model2.add(Dropout(0.2))
model2.add(Conv2D(50,kernel_size=(3,3),strides=(1,1),padding='same', activation='relu'))
model2.add(MaxPooling2D(pool_size=(2,2)))
model2.add(Dropout(0.2))
model2.add(Conv2D(50,kernel_size=(3,3),strides=(1,1),padding='same', activation='relu'))
model2.add(MaxPooling2D(pool_size=(2,2)))
model2.add(Dropout(0.2))
model2.add(Conv2D(50,kernel_size=(3,3),strides=(1,1),padding='same', activation='relu'))
model2.add(MaxPooling2D(pool_size=(2,2)))
model2.add(Flatten())
model2.add(Dense(1, activation= 'sigmoid'))

model2.summary()

In [None]:
es2 = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=40)
mc2 = ModelCheckpoint('best_model2.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

In [None]:
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
 
history2 = model2.fit(x_tr,y_tr,epochs=100,batch_size=20, validation_split=0.2, callbacks=[es2, mc2])

In [None]:
print (history2.history.keys())

plt.plot(history2.history['loss'])
plt.plot(history2.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()  

In [None]:
preds2 = np.vectorize(lambda x: 1 if x > 0.75 else 0)(model2.predict(x_te))
preds2 = np.resize(preds2,(2500))

## Score : Model 2

In [None]:
np.sum(preds2 == trues)/2500

In [None]:
from tensorflow.keras.models import load_model

## Score : Saved Model 1

In [None]:
saved_model = load_model('best_model.h5')

preds11 = np.vectorize(lambda x: 1 if x > 0.75 else 0)(saved_model.predict(x_te))

preds11 = np.resize(preds11,(2500))

np.sum(preds11 == trues)/2500

## Score : Saved Model 2

In [None]:
saved_model2 = load_model('best_model2.h5')

preds22 = np.vectorize(lambda x: 1 if x > 0.75 else 0)(saved_model2.predict(x_te))

preds22 = np.resize(preds22,(2500))

np.sum(preds22 == trues)/2500

## Model with batch normalization

In [None]:
model3 = Sequential()

model3.add(Conv2D(32, (3, 3), input_shape=x_tr.shape[1:]))
model3.add(BatchNormalization())
model3.add(Activation('relu'))
model3.add(Conv2D(32, (3, 3)))
model3.add(BatchNormalization())
model3.add(Activation('relu'))
model3.add(Conv2D(32, (3, 3)))
model3.add(BatchNormalization())
model3.add(Activation('relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))

model3.add(Conv2D(64, (3, 3)))
model3.add(BatchNormalization())
model3.add(Activation('relu'))
model3.add(Conv2D(64, (3, 3)))
model3.add(BatchNormalization())
model3.add(Activation('relu'))
model3.add(Conv2D(64, (3, 3)))
model3.add(BatchNormalization())
model3.add(Activation('relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))

model3.add(Conv2D(128, (3, 3)))
model3.add(BatchNormalization())
model3.add(Activation('relu'))

model3.add(Flatten())
model3.add(Dense(1024))
model3.add(Activation('relu'))
model3.add(Dropout(0.6))

model3.add(Dense(256))
model3.add(Activation('relu'))
model3.add(Dropout(0.6))

model3.add(Dense(1))
model3.add(Activation('sigmoid'))

model3.summary()

In [None]:
es3 = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=40)
mc3 = ModelCheckpoint('best_model3.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

In [None]:
model3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
 
history3 = model3.fit(x_tr,y_tr,epochs=100,batch_size=20, validation_split=0.2, callbacks=[es3, mc3])

In [None]:
print (history3.history.keys())

plt.plot(history3.history['loss'])
plt.plot(history3.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()  

## Score : Model 3

In [None]:
preds3 = np.vectorize(lambda x: 1 if x > 0.75 else 0)(model3.predict(x_te))

preds3 = np.resize(preds3,(2500))

np.sum(preds3 == trues)/2500

In [None]:
!ls -l

In [None]:
saved_model3 = load_model('best_model3.h5')

preds33 = np.vectorize(lambda x: 1 if x > 0.75 else 0)(saved_model3.predict(x_te))

preds33 = np.resize(preds33,(2500))

np.sum(preds33 == trues)/2500

In [None]:
bestModel = saved_model3

In [None]:
%%time
X_tst = []
Test_imgs = []

imgs = submissionDf['id'].values

for img_id in tqdm_notebook(imgs):
    X_tst.append(cv2.imread(TESTPATH + img_id))     
    Test_imgs.append(img_id)
    
X_tst = np.asarray(X_tst)
X_tst = X_tst.astype('float32')
X_tst /= 255



In [None]:
test_predictions = bestModel.predict(X_tst)

In [None]:
sub_df = pd.DataFrame(test_predictions, columns=['has_cactus'])
sub_df['has_cactus'] = sub_df['has_cactus'].apply(lambda x: 1 if x > 0.75 else 0)

In [None]:
sub_df['id'] = ''
cols = sub_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
sub_df=sub_df[cols]

In [None]:
for i, img in enumerate(Test_imgs):
    sub_df.set_value(i,'id',img)

In [None]:
sub_df.head()

In [None]:
sub_df.to_csv('submission.csv',index=False)

In [None]:
!ls