# Plant Seedlings Classification 



Data Description:
You are provided with a training set and a test set of images of plant seedlings at various stages of grown.
Each image has a filename that is its unique id.
The dataset comprises 12 plant species.

The goal of the competition is to create a classifier capable of determining a plant's species from a photo.


# Steps and tasks:


1. Import the libraries, load dataset, print shape of data, visualize the images in dataset.  
2. Data Pre-processing:  
a. Normalization.
b. Gaussian Blurring.
c. Masking
d. Visualize data after pre-processing.
3. Make data compatible:  
a. Split the dataset into training, testing, and validation set.
b. Reshape data into shapes compatible with Keras models.
c. Convert labels from digits to one hot vectors.
d. Print the label for y_train[0].
4. Building CNN:  
a. Define layers.
b. Set optimizer and loss function. (Use Adam optimizer and categorical crossentropy.)
5. Fit and evaluate model and print confusion matrix.  
6. Submit predictions on the test image on Kaggle. 



# Code
Import libraries, load dataset,data pre-processing, make data compatible

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
import pandas as pd
from glob import glob
from matplotlib import pyplot as plt
import itertools

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D,Activation,GlobalMaxPool2D,GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam,RMSprop
from keras.utils.np_utils import to_categorical  
from keras.utils import np_utils
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score 

In [None]:
 !wget --header="Host: storage.googleapis.com" --header="User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" --header="Accept-Language: en-US,en;q=0.9" --header="Referer: https://www.kaggle.com/" "https://storage.googleapis.com/kaggle-competitions-data/kaggle-v2/7880/862031/bundle/archive.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1610181297&Signature=p0zAvDLq8TOzzlt8IbXGsVqY7PbTzyWcn6CgRU571bqMeuYIn3OYkVrj0udTXuetlfCJPkkmwom%2B%2FMyO0hi3rUSEQzVaeQ537DULzChhPScD5PoEhtPC0fK40DkGmIxc9D8N8FFpXVtCd5uNYxgHg35y2cwbZId%2FSdbj0rHpaBbKCDy5o%2BBNkIAXZsUsvERLUPv8xg2tXbqfozp%2FjCoa4hszkNTVSRA6%2FlVSbBHOhIGGtBdTvZUAqZwdCY5m86NZOElQOwCNFJy3XJXZKhfXHx3oN1loI8JBhXEMMc6BhKhJymCEEI%2BlkYPU5%2F%2FLZ3DftPHyvtehkerOexzT6f6dhg%3D%3D&response-content-disposition=attachment%3B+filename%3Dplant-seedlings-classification.zip" -c -O 'plant-seedlings-classification.zip'

In [None]:
!ls

In [None]:
# move the downloaded zip to my drive - Data Folder
!mv plant-seedlings-classification.zip "/content/drive/My Drive/Colab Notebooks/Data/plant-seedlings-classification.zip"

In [None]:
# Set the path to the dataset folder. (The dataset contains image folder: "train")
train_path = "/content/drive/My Drive/Colab Notebooks/Data/plant-seedlings-classification.zip"

# Set the path to plant-seedlings-classification.zip after download it using wget
#train_path = "/content/plant-seedlings-classification.zip"

In [None]:
!mkdir temp_train

In [None]:
# Extract the files from dataset to temp_train and temp_test folders (as the dataset is a zip file.)
from zipfile import ZipFile
with ZipFile(train_path, 'r') as zip:
  zip.extractall('./temp_train')

In [None]:
path = "./temp_train/train/*/*.png"  # The path to all images in training set. (* means include all folders and files.)
files = glob(path)

trainImg = [] # Initialize empty list to store the image data as numbers.
trainLabel = [] # Initialize empty list to store the labels of images
j = 1
num = len(files)

# Obtain images and resizing, obtain labels
for img in files:
    '''
    Append the image data to trainImg list.
    Append the labels to trainLabel list.
    '''
    print(str(j) + "/" + str(num), end="\r")
    trainImg.append(cv2.resize(cv2.imread(img), (128, 128)))  # Get image (with resizing to 128x128)
    trainLabel.append(img.split('/')[-2])  # Get image label (folder name contains the class to which the image belong)
    j += 1

trainImg = np.asarray(trainImg)  # Train images set
trainLabel = pd.DataFrame(trainLabel)  # Train labels set

In [None]:
print(trainImg.shape)
print(trainLabel.shape)

In [None]:
trainLabel[0].value_counts()

In [None]:
f = plt.figure(figsize=(20, 20))

f.add_subplot(2, 6, 1)
plt.imshow(trainImg[0])
plt.title(trainLabel[0][0])

f.add_subplot(2, 6, 2)
plt.imshow(trainImg[496])
plt.title(trainLabel[0][496])

f.add_subplot(2, 6, 3)
plt.imshow(trainImg[759])
plt.title(trainLabel[0][759])
 
f.add_subplot(2, 6, 4)
plt.imshow(trainImg[1370])
plt.title(trainLabel[0][1370])

f.add_subplot(2, 6, 5)
plt.imshow(trainImg[1755])
plt.title(trainLabel[0][1755])

f.add_subplot(2, 6, 6)
plt.imshow(trainImg[1685])
plt.title(trainLabel[0][1685])

f.add_subplot(1, 6, 1)
plt.imshow(trainImg[2409])
plt.title(trainLabel[0][2409])

f.add_subplot(1, 6, 2)
plt.imshow(trainImg[2630])
plt.title(trainLabel[0][2630])

f.add_subplot(1, 6, 3)
plt.imshow(trainImg[3020])
plt.title(trainLabel[0][3020])
 
f.add_subplot(1, 6, 4)
plt.imshow(trainImg[3251])
plt.title(trainLabel[0][3251])

f.add_subplot(1, 6, 5)
plt.imshow(trainImg[3538])
plt.title(trainLabel[0][3538])

f.add_subplot(1, 6, 6)
plt.imshow(trainImg[4749])
plt.title(trainLabel[0][4749])

In [None]:
trainImg_new = []
sets = []; getEx = True

for i in trainImg:
    # Blurred image
    blurr = cv2.GaussianBlur(i,(5,5),0)
    # HSV image
    hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)

    #Green Parameters
    sensitivity = 35
    lower  = np.array([60 - sensitivity, 100, 50])
    upper = np.array([60 + sensitivity, 255, 255])
    
    #Masked image
    mask = cv2.inRange(hsv,lower,upper)
    struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
    mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
    
    #Boolean image
    boolean = mask>0
    new = np.zeros_like(i,np.uint8)
    new[boolean] = i[boolean]
    trainImg_new.append(new)
    
    if getEx:
        f = plt.figure(figsize=(20, 20))
        f.add_subplot(1,6,1);plt.imshow(i);plt.title('Original image') # Original image
        f.add_subplot(1,6,2);plt.imshow(blurr);plt.title('Blurred image') # Blurred image
        f.add_subplot(1,6,3);plt.imshow(hsv);plt.title('HSV image') # HSV image
        f.add_subplot(1,6,4);plt.imshow(mask);plt.title('Masked image') # Masked image
        f.add_subplot(1,6,5);plt.imshow(boolean);plt.title('BOOLEAN Masked image') # BOOLEAN Masked image
        f.add_subplot(1,6,6);plt.imshow(new);plt.title('New Processed image') # New Processed image
        getEx = False
        
trainImg_new = np.asarray(trainImg_new)


In [None]:
trainImg_new.shape

In [None]:
labels = preprocessing.LabelEncoder()
labels.fit(trainLabel[0])
print('Classes'+str(labels.classes_))

In [None]:
encodedlabel = labels.transform(trainLabel[0])
convertedlabels = np_utils.to_categorical(encodedlabel)
classes = convertedlabels.shape[1]
print(str(classes))

In [None]:
# normalization of images
trainImg_new = trainImg_new.astype('float32') / 255.0

In [None]:
X_train,X_test,y_train,y_test = train_test_split(trainImg_new,convertedlabels,test_size=0.3,random_state=38,stratify=convertedlabels)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
X_val,X_test_new,y_val,y_test_new = train_test_split(X_test,y_test,test_size=0.5,random_state=38,stratify=y_test)

In [None]:
print(X_val.shape)
print(y_val.shape)
print(X_test_new.shape)
print(y_test_new.shape)

In [None]:
y_train[0]

In [None]:
labels.classes_[10]

# CNN Model 1


*   2 convolution layers ( filters=64 / 128 , kernel_size=(3, 3) activation='relu')
*   MaxPool2D((2, 2)
*   Dropout(0.25)
*   Flatten
*   2 dense layers (128 / 64, activation='relu')
*   Dropout(0.25)
*   loss='categorical_crossentropy', optimizer='adam'
*   model compile with ImageDataGenerator to minimize overfitting.
*   shuffle = True 








In [None]:
generator = ImageDataGenerator(rotation_range = 180,
                               zoom_range = 0.2,
                               width_shift_range = 0.2,
                               height_shift_range = 0.2,
                               horizontal_flip = True,
                               vertical_flip = True)
generator.fit(X_train)

In [None]:
model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3, 3), input_shape=(128, 128, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(generator.flow(X_train,y_train,batch_size=64),epochs=200, verbose=2,shuffle=True,validation_data=(X_val,y_val))
pd.DataFrame(history.history)

In [None]:
plt.plot(np.array(history.history['accuracy']) * 100)
plt.plot(np.array(history.history['val_accuracy']) * 100)
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train', 'validation'])
plt.title('Accuracy over epochs')
plt.show()

In [None]:
scores = model.evaluate(X_test_new, y_test_new)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])


In [None]:
# confusion matrix function

def plot_confusion_matrix(cm, classes, normalize=False,title='Confusion matrix',cmap=plt.cm.Greens):
    
    fig = plt.figure(figsize=(10,10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

In [None]:
# Predict the values from the test data
y_pred = model.predict(X_test_new)
y_pred_Classes = np.argmax(y_pred, axis = 1) 
trueY = np.argmax(y_test_new, axis = 1) 

# confusion matrix
confusionMTX = confusion_matrix(trueY, y_pred_Classes) 

# plot the confusion matrix
plot_confusion_matrix(confusionMTX, classes = labels.classes_) 

In [None]:
#Final score and accuracy of the model

score, acc = model.evaluate(X_test_new,y_test_new)
score1, acc1 = model.evaluate(X_train,y_train)
print('Test score:', score,'   Test accuracy:', acc)
print('Train score:', score1,'   Train accuracy:',acc1)

Model with training accuracy of 86% while testing accuracy 84%. 


In [None]:
test_images_path = "./temp_train/test/*.png"


test_images = glob(test_images_path)
test_images_arr = []
test_files = []

for img in test_images:
    test_images_arr.append(cv2.resize(cv2.imread(img), (128, 128)))
    test_files.append(img.split('/')[-1])

test_X = np.asarray(test_images_arr)

# Normalization of the Image Data
test_X = test_X.astype('float32') / 255

In [None]:
 test_X

In [None]:
predictions = model.predict(test_X)
preds = np.argmax(predictions, axis=1)
pred_str = labels.classes_[preds]

In [None]:
final_predictions = {'file':test_files, 'species':pred_str}
final_predictions = pd.DataFrame(final_predictions)
final_predictions.to_csv("./temp_train/submission.csv", index=False)

# CNN Model 2 

*   3 convolution layers (filters=64/128/128 , kernel_size=(3, 3) activation='relu')
*   MaxPool2D((2, 2),
*   Dropout(0.25)
*   Flatten
*   1 dense layer (256, activation='relu')
*   Dropout(0.5)
*   loss='categorical_crossentropy', optimizer='adam'
*   model compile with ImageDataGenerator to minimize overfitting.
*   shuffle = True 


In [None]:
generator = ImageDataGenerator(rotation_range = 180,
                               zoom_range = 0.2,
                               width_shift_range = 0.2,
                               height_shift_range = 0.2,
                               horizontal_flip = True,
                               vertical_flip = True)
generator.fit(X_train)

In [None]:
model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3, 3), input_shape=(128, 128, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(generator.flow(X_train,y_train,batch_size=64),epochs=100, verbose=2,shuffle=True,validation_data=(X_val,y_val))
pd.DataFrame(history.history)

In [None]:
plt.plot(np.array(history.history['accuracy']) * 100)
plt.plot(np.array(history.history['val_accuracy']) * 100)
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train', 'validation'])
plt.title('Accuracy over epochs')
plt.show()

In [None]:
scores = model.evaluate(X_test_new, y_test_new)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])


In [None]:
# confusion matrix function

def plot_confusion_matrix(cm, classes, normalize=False,title='Confusion matrix',cmap=plt.cm.Greens):
    
    fig = plt.figure(figsize=(10,10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

In [None]:
# Predict the values from the test data
y_pred = model.predict(X_test_new)
y_pred_Classes = np.argmax(y_pred, axis = 1) 
trueY = np.argmax(y_test_new, axis = 1) 

# confusion matrix
confusionMTX = confusion_matrix(trueY, y_pred_Classes) 

# plot the confusion matrix
plot_confusion_matrix(confusionMTX, classes = labels.classes_) 

In [None]:
#Final score and accuracy of the model

score, acc = model.evaluate(X_test_new,y_test_new)
score1, acc1 = model.evaluate(X_train,y_train)
print('Test score:', score,'   Test accuracy:', acc)
print('Train score:', score1,'   Train accuracy:',acc1)

In [None]:
test_images_path = "./temp_train/test/*.png"

test_images = glob(test_images_path)
test_images_arr = []
test_files = []

for img in test_images:

    i = cv2.resize(cv2.imread(img), (128, 128))
    test_files.append(img.split('/')[-1])

    # Blurred image
    blurr = cv2.GaussianBlur(i,(5,5),0)

    # HSV image
    hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)

    #Green Parameters
    sensitivity = 35
    lower  = np.array([60 - sensitivity, 100, 50])
    upper = np.array([60 + sensitivity, 255, 255])
    
    #Masked image
    mask = cv2.inRange(hsv,lower,upper)
    struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
    mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
    
    #Boolean image
    boolean = mask>0
    new = np.zeros_like(i,np.uint8)
    new[boolean] = i[boolean]
    test_images_arr.append(new)

test_X = np.asarray(test_images_arr)

# Normalization of the Image Data
test_X = test_X.astype('float32') / 255

In [None]:
predictions = model.predict(test_X)
preds = np.argmax(predictions, axis=1)
pred_str = labels.classes_[preds]

In [None]:
final_predictions = {'file':test_files, 'species':pred_str}
final_predictions = pd.DataFrame(final_predictions)
final_predictions.to_csv("./temp_train/submission.csv", index=False)

Model 2 has 90% on training accuracy and 87 %  on testing accuracy.  




# CNN Model 3 - VGG16
*   Flatten
*   2 dense layers (256, activation='relu')
*   Dropout(0.5)
*   loss='categorical_crossentropy', optimizer='adam'
*   model compile with ImageDataGenerator to minimize overfitting.
*   shuffle = True 


In [None]:
generator = ImageDataGenerator(rotation_range = 180,
                               zoom_range = 0.2,
                               width_shift_range = 0.2,
                               height_shift_range = 0.2,
                               horizontal_flip = True,
                               vertical_flip = True)
generator.fit(X_train)

In [None]:
from keras.applications.vgg16 import VGG16

# initialize the VGG-16 model
# remove the final layer of the model and add 12 classess of plant seedlings
# input images: 128px by 128px.

prior_model = VGG16(weights='imagenet',include_top=False, input_shape=(128,128,3))

# lets create our model

model = Sequential()

# and here we add a all the VGG16 as a layer

model.add(prior_model)



In [None]:
model.summary()

In [None]:
model.layers[0].summary()

In [None]:
model.add(Flatten())
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(12, activation='softmax'))

In [None]:
model.summary()

In [None]:
for layers in model.layers[0].layers: # looping over each layers in layer 0 to freeze them
  layers.trainable = False

model.layers[0].trainable = False # freezing layer 0 as well for good measure

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(generator.flow(X_train,y_train,batch_size=64),epochs=200, verbose=2,shuffle=True,validation_data=(X_val,y_val))
pd.DataFrame(history.history)

In [None]:
plt.plot(np.array(history.history['accuracy']) * 100)
plt.plot(np.array(history.history['val_accuracy']) * 100)
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train', 'validation'])
plt.title('Accuracy over epochs')
plt.show()

In [None]:
scores = model.evaluate(X_test_new, y_test_new)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
# confusion matrix function

def plot_confusion_matrix(cm, classes, normalize=False,title='Confusion matrix',cmap=plt.cm.Greens):
    
    fig = plt.figure(figsize=(10,10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

In [None]:
# Predict the values from the test data
y_pred = model.predict(X_test_new)
y_pred_Classes = np.argmax(y_pred, axis = 1) 
trueY = np.argmax(y_test_new, axis = 1) 

# confusion matrix
confusionMTX = confusion_matrix(trueY, y_pred_Classes) 

# plot the confusion matrix
plot_confusion_matrix(confusionMTX, classes = labels.classes_) 

In [None]:
#Final score and accuracy of the model

score, acc = model.evaluate(X_test_new,y_test_new)
score1, acc1 = model.evaluate(X_train,y_train)
print('Test score:', score,'   Test accuracy:', acc)
print('Train score:', score1,'   Train accuracy:',acc1)

In [None]:
test_images_path = "./temp_train/test/*.png"

test_images = glob(test_images_path)
test_images_arr = []
test_files = []

for img in test_images:

    i = cv2.resize(cv2.imread(img), (128, 128))
    test_files.append(img.split('/')[-1])

    # Blurred image
    blurr = cv2.GaussianBlur(i,(5,5),0)

    # HSV image
    hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)

    #Green Parameters
    sensitivity = 35
    lower  = np.array([60 - sensitivity, 100, 50])
    upper = np.array([60 + sensitivity, 255, 255])
    
    #Masked image
    mask = cv2.inRange(hsv,lower,upper)
    struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
    mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
    
    #Boolean image
    boolean = mask>0
    new = np.zeros_like(i,np.uint8)
    new[boolean] = i[boolean]
    test_images_arr.append(new)

test_X = np.asarray(test_images_arr)

# Normalization of the Image Data
test_X = test_X.astype('float32') / 255

In [None]:
predictions = model.predict(test_X)
preds = np.argmax(predictions, axis=1)
pred_str = labels.classes_[preds]

In [None]:
final_predictions = {'file':test_files, 'species':pred_str}
final_predictions = pd.DataFrame(final_predictions)
final_predictions.to_csv("./temp_train/submission.csv", index=False)

Model 3 has 88.7% on training accuracy and 85.5% on testing accuracy.

# CNN Model 4 - InceptionV3

*   Flatten
*   2 dense layers (1024, activation='relu')
*   Dropout(0.5)
*   loss='categorical_crossentropy', optimizer='adam'
*   model compile with ImageDataGenerator to minimize overfitting.
*   shuffle = True 


In [None]:
from keras.applications.inception_v3 import InceptionV3

# initialize the InceptionV3 model
# remove the final layer of the model and add 12 classess of plant seedlings
# input images: 128px by 128px.

prior_model = InceptionV3(weights='imagenet',include_top=False, input_shape=(128,128,3))

# lets create our model

model = Sequential()

# and here we add a all the InceptionV3 as a layer

model.add(prior_model)


In [None]:
model.summary()

In [None]:
model.layers[0].summary()

In [None]:
model.add(Flatten())

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(classes, activation='softmax'))

model.summary()

In [None]:
for layers in model.layers[0].layers: # looping over each layers in layer 0 to freeze them
  layers.trainable = False

model.layers[0].trainable = False # freezing layer 0 as well for good measure

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(generator.flow(X_train,y_train,batch_size=64),epochs=200, verbose=2,shuffle=True,validation_data=(X_val,y_val))
pd.DataFrame(history.history)

In [None]:
plt.plot(np.array(history.history['accuracy']) * 100)
plt.plot(np.array(history.history['val_accuracy']) * 100)
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train', 'validation'])
plt.title('Accuracy over epochs')
plt.show()

In [None]:
scores = model.evaluate(X_test_new, y_test_new)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
# Predict the values from the test data
y_pred = model.predict(X_test_new)
y_pred_Classes = np.argmax(y_pred, axis = 1) 
trueY = np.argmax(y_test_new, axis = 1) 

# confusion matrix
confusionMTX = confusion_matrix(trueY, y_pred_Classes) 

# plot the confusion matrix
plot_confusion_matrix(confusionMTX, classes = labels.classes_) 

In [None]:
#Final score and accuracy of the model

score, acc = model.evaluate(X_test_new,y_test_new)
score1, acc1 = model.evaluate(X_train,y_train)
print('Test score:', score,'   Test accuracy:', acc)
print('Train score:', score1,'   Train accuracy:',acc1)


In [None]:
test_images_path = "./temp_train/test/*.png"

test_images = glob(test_images_path)
test_images_arr = []
test_files = []

for img in test_images:

    i = cv2.resize(cv2.imread(img), (128, 128))
    test_files.append(img.split('/')[-1])

    # Blurred image
    blurr = cv2.GaussianBlur(i,(5,5),0)

    # HSV image
    hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)

    #Green Parameters
    sensitivity = 35
    lower  = np.array([60 - sensitivity, 100, 50])
    upper = np.array([60 + sensitivity, 255, 255])
    
    #Masked image
    mask = cv2.inRange(hsv,lower,upper)
    struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
    mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
    
    #Boolean image
    boolean = mask>0
    new = np.zeros_like(i,np.uint8)
    new[boolean] = i[boolean]
    test_images_arr.append(new)

test_X = np.asarray(test_images_arr)

# Normalization of the Image Data
test_X = test_X.astype('float32') / 255


In [None]:
predictions = model.predict(test_X)
preds = np.argmax(predictions, axis=1)
pred_str = labels.classes_[preds]

final_predictions = {'file':test_files, 'species':pred_str}
final_predictions = pd.DataFrame(final_predictions)
final_predictions.to_csv("./temp_train/submission.csv", index=False)


Model 4 has 90% on training accuracy and 84% on testing accuracy.
 

# CNN Model 5

*   4 convolution layers (filters=64/64/128/256, kernel_size=(3, 3) activation='relu')
*   MaxPool2D((2, 2) 
*   Dropout(0.25)
*   GlobalMaxPool2D
*   Flatten
*   2 dense layers (256 / 256, activation='relu')
*   Dropout(0.25)
*   loss='categorical_crossentropy', optimizer='adam'
*   model compile with ImageDataGenerator to minimize overfitting.
*   shuffle = True 


In [None]:
model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3, 3), input_shape=(128, 128, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=256, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(GlobalMaxPool2D())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(generator.flow(X_train,y_train,batch_size=64),epochs=200, verbose=2,shuffle=True,validation_data=(X_val,y_val))
pd.DataFrame(history.history)

In [None]:
plt.plot(np.array(history.history['accuracy']) * 100)
plt.plot(np.array(history.history['val_accuracy']) * 100)
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train', 'validation'])
plt.title('Accuracy over epochs')
plt.show()

In [None]:
scores = model.evaluate(X_test_new, y_test_new)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
# Predict the values from the test data
y_pred = model.predict(X_test_new)
y_pred_Classes = np.argmax(y_pred, axis = 1) 
trueY = np.argmax(y_test_new, axis = 1) 

# confusion matrix
confusionMTX = confusion_matrix(trueY, y_pred_Classes) 

# plot the confusion matrix
plot_confusion_matrix(confusionMTX, classes = labels.classes_) 


In [None]:
#Final score and accuracy of the model

score, acc = model.evaluate(X_test_new,y_test_new)
score1, acc1 = model.evaluate(X_train,y_train)
print('Test score:', score,'   Test accuracy:', acc)
print('Train score:', score1,'   Train accuracy:',acc1)



In [None]:
test_images_path = "./temp_train/test/*.png"

test_images = glob(test_images_path)
test_images_arr = []
test_files = []

for img in test_images:

    i = cv2.resize(cv2.imread(img), (128, 128))
    test_files.append(img.split('/')[-1])

    # Blurred image
    blurr = cv2.GaussianBlur(i,(5,5),0)

    # HSV image
    hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)

    #Green Parameters
    sensitivity = 35
    lower  = np.array([60 - sensitivity, 100, 50])
    upper = np.array([60 + sensitivity, 255, 255])
    
    #Masked image
    mask = cv2.inRange(hsv,lower,upper)
    struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
    mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
    
    #Boolean image
    boolean = mask>0
    new = np.zeros_like(i,np.uint8)
    new[boolean] = i[boolean]
    test_images_arr.append(new)

test_X = np.asarray(test_images_arr)

# Normalization of the Image Data
test_X = test_X.astype('float32') / 255


In [None]:
predictions = model.predict(test_X)
preds = np.argmax(predictions, axis=1)
pred_str = labels.classes_[preds]


final_predictions = {'file':test_files, 'species':pred_str}
final_predictions = pd.DataFrame(final_predictions)
final_predictions.to_csv("./temp_train/submission.csv", index=False)


Model 5 has 95% on training accuracy and 93% on testing accuracy. 

 

# CNN Model 6 

*   6 convolution layers (filters=64/64/128/128/256/256, kernel_size=(3, 3) activation='relu')
*   MaxPool2D((2, 2) 
*   Dropout(0.25)
*   GlobalMaxPool2D
*   Flatten
*   3 dense layers (256/256/256, activation='relu')
*   Dropout(0.25)
*   loss='categorical_crossentropy', optimizer='adam'
*   model compile with ImageDataGenerator to minimize overfitting.
*   shuffle = True 



In [None]:
model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3, 3), input_shape=(128, 128, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=256, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=256, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.25))

model.add(GlobalMaxPool2D())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
generator = ImageDataGenerator(rotation_range = 180,
                               zoom_range = 0.2,
                               width_shift_range = 0.2,
                               height_shift_range = 0.2,
                               horizontal_flip = True,
                               vertical_flip = True)
generator.fit(X_train)

In [None]:
history = model.fit(generator.flow(X_train,y_train,batch_size=64),epochs=200, verbose=2,shuffle=True,validation_data=(X_val,y_val))
pd.DataFrame(history.history)

In [None]:
plt.plot(np.array(history.history['accuracy']) * 100)
plt.plot(np.array(history.history['val_accuracy']) * 100)
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train', 'validation'])
plt.title('Accuracy over epochs')
plt.show()

In [None]:
scores = model.evaluate(X_test_new, y_test_new)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
# confusion matrix function

def plot_confusion_matrix(cm, classes, normalize=False,title='Confusion matrix',cmap=plt.cm.Greens):
    
    fig = plt.figure(figsize=(10,10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

In [None]:
# Predict the values from the test data
y_pred = model.predict(X_test_new)
y_pred_Classes = np.argmax(y_pred, axis = 1) 
trueY = np.argmax(y_test_new, axis = 1) 

# confusion matrix
confusionMTX = confusion_matrix(trueY, y_pred_Classes) 

# plot the confusion matrix
plot_confusion_matrix(confusionMTX, classes = labels.classes_) 

In [None]:
#Final score and accuracy of the model

score, acc = model.evaluate(X_test_new,y_test_new)
score1, acc1 = model.evaluate(X_train,y_train)
print('Test score:', score,'   Test accuracy:', acc)
print('Train score:', score1,'   Train accuracy:',acc1)

In [None]:
test_images_path = "./temp_train/test/*.png"

test_images = glob(test_images_path)
test_images_arr = []
test_files = []

for img in test_images:

    i = cv2.resize(cv2.imread(img), (128, 128))
    test_files.append(img.split('/')[-1])

    # Blurred image
    blurr = cv2.GaussianBlur(i,(5,5),0)

    # HSV image
    hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)

    #Green Parameters
    sensitivity = 35
    lower  = np.array([60 - sensitivity, 100, 50])
    upper = np.array([60 + sensitivity, 255, 255])
    
    #Masked image
    mask = cv2.inRange(hsv,lower,upper)
    struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
    mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
    
    #Boolean image
    boolean = mask>0
    new = np.zeros_like(i,np.uint8)
    new[boolean] = i[boolean]
    test_images_arr.append(new)

test_X = np.asarray(test_images_arr)

# Normalization of the Image Data
test_X = test_X.astype('float32') / 255

In [None]:
predictions = model.predict(test_X)
preds = np.argmax(predictions, axis=1)
pred_str = labels.classes_[preds]


final_predictions = {'file':test_files, 'species':pred_str}
final_predictions = pd.DataFrame(final_predictions)
final_predictions.to_csv("./temp_train/submission.csv", index=False)