#Import and understand the data

In [1]:
# import packages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pathlib
import os
import cv2

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

In [2]:
print(tf.__version__)

2.8.0


In [3]:
# extract zip

#!unzip "/content/drive/MyDrive/greatlearning/CNN Project/Dataset - Plant-seedlings-classification.zip" -d "/content/drive/MyDrive/greatlearning/CNN Project/Plant Seedling Classification"

In [4]:
# map the images from train folder with train labels to form a dataframe

data_dir = "/content/drive/MyDrive/greatlearning/CNN Project/Plant Seedling Classification"

In [5]:
size = 64

train_data = data_dir + "/train"
classes = os.listdir(train_data)

file_name = []
species = []
image_data = []

In [6]:
def extractImage(path):
  images = os.listdir(path)
  for image_name in images:
    image = cv2.imread(path + "/" + image_name)
    image_data.append(preprocessImage(image))
    species.append(path.split("/")[-1])
    file_name.append(image_name)

In [7]:
def preprocessImage(image):
  image = np.array(image, dtype = "float32")
  image = cv2.resize(image, (size, size))
  image = np.reshape(image, (size, size, 3)) 
  image /= 255.0
  return np.array(image, dtype = "float32")

In [8]:
for class_name in classes:
  extractImage(train_data + "/" + class_name)

In [9]:
seed_df = pd.DataFrame({"file name": file_name, "species": species, "image data": image_data})

In [10]:
seed_df.head(5)

Unnamed: 0,file name,species,image data
0,0183fdf68.png,Black-grass,"[[[0.18335631, 0.20590916, 0.23322994], [0.196..."
1,0260cffa8.png,Black-grass,"[[[0.08539273, 0.12687558, 0.1856991], [0.1201..."
2,0050f38b3.png,Black-grass,"[[[0.09251302, 0.18246017, 0.30734146], [0.143..."
3,05eedce4d.png,Black-grass,"[[[0.19176696, 0.32280248, 0.41664106], [0.198..."
4,075d004bc.png,Black-grass,"[[[0.6736582, 0.6736582, 0.6691586], [0.656164..."


In [11]:
seed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4750 entries, 0 to 4749
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   file name   4750 non-null   object
 1   species     4750 non-null   object
 2   image data  4750 non-null   object
dtypes: object(3)
memory usage: 111.5+ KB


In [89]:
# function that will select n random images

def randomImages(input_num):
  return seed_df[['image data', 'species']].sample(input_num)

In [90]:
randomImages(2)

Unnamed: 0,image data,species
1401,"[[[0.08776808, 0.1573223, 0.25532705], [0.0931...",Common Chickweed
1905,"[[[0.29411766, 0.31764707, 0.32941177], [0.294...",Fat Hen


#Data preprocessing

In [13]:
# create x and y

X = np.array(image_data, dtype = 'float32')
y = seed_df['species']

In [14]:
file_name = []
species = []
image_data = []

In [15]:
# encode the labels

y = keras.utils.to_categorical(y.factorize()[0], len(classes))

In [16]:
y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [17]:
y.shape

(4750, 12)

In [18]:
# image is already unified and normalized in the preprocessing method

# unify

# normalize

#Model training

In [19]:
X.shape

(4750, 64, 64, 3)

In [20]:
# split the data 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [29]:
# create a CNN model

from keras.models import Sequential
from keras.layers.normalization.batch_normalization import BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping

model = Sequential([
  Conv2D(64, 3, input_shape=(64, 64, 3)),
  Activation('leaky_relu'),
  BatchNormalization(),
  Conv2D(128, 3),
  Activation('leaky_relu'),
  BatchNormalization(),
  Conv2D(64, 3),
  Activation('leaky_relu'),
  BatchNormalization(),
  Conv2D(32, 3),
  Activation('leaky_relu'),
  BatchNormalization(),
  Flatten(),
  Dense(128),
  BatchNormalization(),
  Dense(len(classes)),
  Activation('softmax')
])

In [30]:
model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [31]:
model.build()

In [32]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 62, 62, 64)        1792      
                                                                 
 activation_5 (Activation)   (None, 62, 62, 64)        0         
                                                                 
 batch_normalization_5 (Batc  (None, 62, 62, 64)       256       
 hNormalization)                                                 
                                                                 
 conv2d_5 (Conv2D)           (None, 60, 60, 128)       73856     
                                                                 
 activation_6 (Activation)   (None, 60, 60, 128)       0         
                                                                 
 batch_normalization_6 (Batc  (None, 60, 60, 128)      512       
 hNormalization)                                      

In [34]:
early_stopping = EarlyStopping(
  monitor='val_accuracy',
  patience=10,
  verbose=1,
  mode='auto'
)

In [35]:
callbacks = [early_stopping]  

In [36]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(3182, 64, 64, 3)
(1568, 64, 64, 3)
(3182, 12)
(1568, 12)


In [37]:
batch_size = 32
epochs = 100

model.fit(
  X_train, y_train, 
  batch_size=batch_size, 
  epochs=epochs,
  validation_data=(X_test, y_test),
  callbacks=callbacks
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 39: early stopping


<keras.callbacks.History at 0x7f0a90346c90>

The model is highly overfit for the data that is available. To generalise the model more we need more data. This can be easily achieved through augmentation. But augmentation is not a part of this project. Hence it is not used.

In [71]:
pred = model.predict(X_test[9:10]) 

In [72]:
pred[0]

array([1.6721874e-03, 3.2256356e-08, 1.6461132e-06, 1.6649852e-03,
       7.7531557e-05, 9.0594327e-01, 3.2114111e-02, 2.9227346e-07,
       1.9135719e-04, 5.7616185e-02, 7.1680365e-04, 1.6115795e-06],
      dtype=float32)

In [73]:
pred[0].argmax()

5

In [80]:
classes[5]

'Fat Hen'

In [81]:
y_test[9:10].argmax()

5