## Import the required libraries

In [None]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
import cv2
from glob import glob
import matplotlib.pyplot as plt                        
%matplotlib inline 

## Read the images from train folder and generate training dataset

In [None]:
# Reading the images and labels
images_path = '/kaggle/input/plant-seedlings-classification/train/*/*.png'
images = glob(images_path)
train_images = []
train_labels = []

for img in images:
    train_images.append(cv2.resize(cv2.imread(img), (256, 256)))
    train_labels.append(img.split('/')[-2])
train_X = np.asarray(train_images)
train_Y = pd.DataFrame(train_labels)

## Plot an image from training data set

In [None]:
# Displaying an image
plt.imshow(train_X[1])

In [None]:
train_X.shape

## Print all unique output classes

In [None]:
train_Y[0].unique()

## Label encode all the output classes

In [None]:
# Converting labels to numbers
encoder = LabelEncoder()
encoder.fit(train_Y[0])
encoded_labels = encoder.transform(train_Y[0])
categorical_labels = np_utils.to_categorical(encoded_labels)

In [None]:
encoder.classes_

## Split the training data into train and validation data set

In [None]:
x_train,x_test,y_train,y_test=train_test_split(train_X,categorical_labels,test_size=0.2,random_state=7)

## Normalize the train and validation images

In [None]:
# Normalization of the Image Data
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255 

In [None]:
plt.imshow(x_train[0])

## Initialize the object of ImageDataGenerator

In [None]:
datagen= tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=20,
                                                         width_shift_range=0.2,
                                                         height_shift_range=0.2,
                                                         zoom_range=[0.4,1.5],
                                                         horizontal_flip=True,
                                                         vertical_flip=True)

datagen.fit(x_train)

## Initialize and build the model

In [None]:
#Clear any previous model from memory
tf.keras.backend.clear_session()

#Initialize model
model = tf.keras.models.Sequential()

#Add 1st Conv Layer
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), input_shape=(256, 256, 3), activation='relu'))

#Add 2nd Conv Layer
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'))

#normalize data
model.add(tf.keras.layers.BatchNormalization())

#Add Max Pool layer
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#Add 3rd Conv Layer
model.add(tf.keras.layers.Conv2D(128, kernel_size=(3,3), activation='relu'))

#normalize data
model.add(tf.keras.layers.BatchNormalization())

#Add Max Pool layer
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#Add 4th Conv Layer
model.add(tf.keras.layers.Conv2D(256, kernel_size=(3,3), activation='relu'))

#normalize data
model.add(tf.keras.layers.BatchNormalization())

#Add Max Pool layer
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#Add Global Max Pool layer
model.add(tf.keras.layers.GlobalMaxPool2D())

#Flatten the data
model.add(tf.keras.layers.Flatten())

#Add 1st dense layer
model.add(tf.keras.layers.Dense(128, activation='relu'))

#normalize data
model.add(tf.keras.layers.BatchNormalization())

#Add Dropout
model.add(tf.keras.layers.Dropout(0.3))

#Add 2nd dense layer
model.add(tf.keras.layers.Dense(128, activation='relu'))

#normalize data
model.add(tf.keras.layers.BatchNormalization())

#Add Dropout
model.add(tf.keras.layers.Dropout(0.3))

#Add Output Layer
model.add(tf.keras.layers.Dense(12, activation='softmax'))

## Compile the model

In [None]:
#Specify Lass and Optimizer
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## Model Summary

In [None]:
#Model Summary
model.summary()

In [None]:
#Saving the best model using model checkpoint callback
model_checkpoint=tf.keras.callbacks.ModelCheckpoint('/kaggle/working/plantspecies_CNN_model.h5', #where to save the model
                                                    save_best_only=True, 
                                                    monitor='val_accuracy', 
                                                    mode='max', 
                                                    verbose=1)

## Fit the model

In [None]:
history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=16), 
                    epochs=200, validation_data=(x_test, y_test),   
                    callbacks = [model_checkpoint])

## Predict the accuracy for both train and validation data

In [None]:
#Final score and accuracy of the model
score, acc = model.evaluate(x_test,y_test)
score1, acc1 = model.evaluate(x_train,y_train)
print('Test score:', score,'   Test accuracy:', acc)
print('Train score:', score1,'   Train accuracy:',acc1)

## Plot the accuracy and loss graph for train and validation set

In [None]:
from matplotlib import pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

## Read the images from test folder and generate test data set

In [None]:
test_images_path = '/kaggle/input/plant-seedlings-classification/test/*.png'
test_images = glob(test_images_path)
test_images_arr = []
test_files = []

for img in test_images:
    test_images_arr.append(cv2.resize(cv2.imread(img), (256, 256)))
    test_files.append(img.split('/')[-1])

test_X = np.asarray(test_images_arr)

In [None]:
# Normalization of the Image Data
test_X = test_X.astype('float32') / 255

## Predict the classes for test data

In [None]:
predictions = model.predict(test_X)

In [None]:
preds = np.argmax(predictions, axis=1)
pred_str = encoder.classes_[preds]

In [None]:
final_predictions = {'file':test_files, 'species':pred_str}
final_predictions = pd.DataFrame(final_predictions)
final_predictions.to_csv("/kaggle/working/submission.csv", index=False)