**This Python File is Based on The Aerial Cactus Identification Challenge by Kaggle and it is executed in the Google Colab code editor using the TPU for computational requirement.**

In [0]:
# Mounting the Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
#Importing Keras
from keras.models import Sequential

from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
import pandas as pd
import numpy as np

Using TensorFlow backend.


In [0]:
import os

In [0]:
#Relocating to a custom created directory

os.chdir('/content/drive/My Drive/fastai/CactusAerial')

In [0]:
#Loading the datasets using Pandas

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('sample_submission.csv')

In [0]:
# Converting the target entry to String due to the requirement given by the flow_from_dataframe method by Keras

train_df['has_cactus'] = train_df['has_cactus'].astype(str)

In [0]:
#Creating the Datagenerators

train_datagen=ImageDataGenerator(rescale=1./255,validation_split=0.2,shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

In [0]:
test_datagen = ImageDataGenerator(rescale=1./255)


In [0]:
train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory='./train',
        x_col="id",
        y_col="has_cactus",
        subset="training",
        shuffle=True,
        target_size=(32, 32),
        batch_size=20,
        class_mode='binary')

Found 14000 validated image filenames belonging to 2 classes.


In [0]:
valid_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory='./train',
        x_col="id",
        y_col="has_cactus",
        subset="validation",
        shuffle=True,
        target_size=(32, 32),
        batch_size=20,
        class_mode='binary')

Found 3500 validated image filenames belonging to 2 classes.


In [0]:
test_generator=test_datagen.flow_from_dataframe(
dataframe=test_df,
directory='./test',
x_col="id",
y_col=None,
shuffle=False,
target_size=(32,32),
batch_size=20,
class_mode=None)

Found 4000 validated image filenames.


In [0]:
#Specifying and Creating the CNN Model for Classification

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(32,32,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6),loss="binary_crossentropy",metrics=["accuracy"])






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [0]:
#Tried to use this for specifying steps per epoch but this was too heavy for the TPU

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [0]:
#Fitting the model to the generators and based on trial and error, Training on only 4 epochs to avoid overfitting

model.fit_generator(generator=train_generator,
                    steps_per_epoch=100,
                    validation_data=valid_generator,
                    validation_steps=50,
                    epochs=4
)




Epoch 1/4





Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fca4d37bc18>

In [0]:
#For Saving the weights

from keras.models import model_from_json

In [0]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


The below given text snippet is used for loading the saved weights and model and to test it

In [0]:
# load json and create model
#json_file = open('model.json', 'r')
#loaded_model_json = json_file.read()
#json_file.close()
#loaded_model = model_from_json(loaded_model_json)
# load weights into new model
#loaded_model.load_weights("model.h5")
#print("Loaded model from disk")
 
# evaluate loaded model on test data
#loaded_model.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6),loss="binary_crossentropy",metrics=["accuracy"])

In [0]:
#Do this  everytime you run predictions on test set to avoid confusions in the results

test_generator.reset()


In [0]:
#Training it for 200 steps per epoch for getting the complete result

pred=model.predict_generator(test_generator, steps=200, verbose=1)



In [0]:
#We get result in the form of an array

pred

array([[0.96717846],
       [0.99908125],
       [0.23617807],
       ...,
       [0.99461937],
       [0.9812589 ],
       [0.8877604 ]], dtype=float32)

In [0]:
#predicted_class_indices=np.argmax(pred,axis=1)

In [0]:
#labels = (train_generator.class_indices)
#labels = dict((v,k) for k,v in labels.items())
#predictions = [labels[k] for k in predicted_class_indices]

In [0]:
#Converting the prediction array to list

newp = pred.tolist()

In [0]:
#Creating the final DataFrame

filenames=test_generator.filenames
final_results=pd.DataFrame({"Filename":filenames,
                      "Prediction":newp})

In [0]:
final_results

Unnamed: 0,Filename,Prediction
0,000940378805c44108d287872b2f04ce.jpg,[0.967178463935852]
1,0017242f54ececa4512b4d7937d1e21e.jpg,[0.9990812540054321]
2,001ee6d8564003107853118ab87df407.jpg,[0.23617807030677795]
3,002e175c3c1e060769475f52182583d0.jpg,[0.3778188228607178]
4,0036e44a7e8f7218e9bc7bf8137e4943.jpg,[0.9408766031265259]
...,...,...
3995,ffaafd0c9f2f0e73172848463bc2e523.jpg,[0.9778838753700256]
3996,ffae37344310a1549162493237d25d3f.jpg,[0.9992281198501587]
3997,ffbd469c56873d064326204aac546e0d.jpg,[0.9946193695068359]
3998,ffcb76b7d47f29ece11c751e5f763f52.jpg,[0.9812589287757874]


In [0]:
#Saving it in the form of CSV

final_results.to_csv("results.csv",index=False)