## Importing libraries

In [None]:
import numpy as np
import pandas as pd

import cv2
import os
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau,ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import itertools

## Plotting libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

## Transfer learning Libraries

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.inception_v3 import InceptionV3

## Sklearn libraries for train test split upsampling 

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split as split
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

## Loading Datasets

In [None]:
main_dir = '../input/plant-seedlings-classification'
train_dir = os.path.join(main_dir,"train")
test_dir = os.path.join(main_dir,"test")

In [None]:
labels = sorted(os.listdir(train_dir))
print(labels)


In [None]:
num_classes = len(labels)
num_classes

## Data preprocessing

In [None]:
labels[0]

In [None]:
count_plot = []
for i in labels:
    count_plot.append(len(os.listdir(os.path.join(train_dir,i))))

In [None]:
count_plot

In [None]:
count_plot = np.array(count_plot)
plt.bar(labels, count_plot)
plt.xticks(rotation = 90)
plt.show()

The dataset is imbalanced need to balance it in order to yeild a good result

In [None]:
count_plot = list(count_plot)
label_values = {}
for i in range(len(labels)):
    label_values[labels[i]] = count_plot[i] 

label_values

In [None]:
train_data = []
for ids, label in enumerate(labels):
    for file in os.listdir(os.path.join(train_dir, label)):
        train_data.append(['train/{}/{}'.format(label, file), ids, label])
        
        
train = pd.DataFrame(train_data, columns=['file', 'id','labels'])
train.head()

In [None]:
train.shape

In [None]:
test_data = []
for file in os.listdir(test_dir):
    test_data.append(['test/{}'.format(file), file])
test = pd.DataFrame(test_data, columns=['Filepath', 'File'])
test.head()

In [None]:
plt.hist(train['id'])
plt.xticks(rotation = 90)
plt.show()

## plotting images

In [None]:
def plot_img(rows,cols, species):
    fig,ax = plt.subplots(rows,cols,figsize=(12,12))
    labels_files = train['file'][train['labels'] == species].values
    n = 0
    for i in range(rows):
        for j in range(cols):
            img_path = os.path.join(main_dir,labels_files[n])
            ax[i,j].set_xticks([])
            ax[i,j].set_yticks([])
            ax[i,j].imshow(cv2.imread(img_path))
            n+=1
plot_img(5,5,labels[0])

## features preprocessing

In [None]:
IMAGE_SIZE = 120

def read_image(filepath):
    return cv2.imread(os.path.join(main_dir,filepath))

def resize_image(img,img_size):
    return cv2.resize(img.copy(),img_size,interpolation=cv2.INTER_AREA)


Converting rgb to hsv so that we can segment the features with bg in order to obtain good results

### Image segmentation

In [None]:
def create_mask(img):
    img_hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    lower_green = np.array([30,100,50])
    upper_green = np.array([85,255,255])
    
    mask = cv2.inRange(img_hsv,lower_green,upper_green)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE,kernel)
    return mask

def segment_image(img):
    mask = create_mask(img)
    res = cv2.bitwise_and(img,img,mask=mask)
    return res

## Plotting segmented images

In [None]:
def show_segmented_images(species, n):
    fig, ax = plt.subplots(n, 4, figsize=(20, 20))
    species_files = train['file'][train['labels'] == species].values
    for i in range(n):
        image = read_image(species_files[i])
        image_masked = create_mask(image)
        image_segmented = segment_image(image)
        image_resized = resize_image(image_segmented, (IMAGE_SIZE, IMAGE_SIZE))
        ax[i, 0].set_axis_off()
        ax[i, 1].set_axis_off()
        ax[i, 2].set_axis_off()
        ax[i, 3].set_axis_off()
        ax[i, 0].imshow(image)
        ax[i, 1].imshow(image_masked)
        ax[i, 2].imshow(image_segmented)
        ax[i, 3].imshow(image_resized)

show_segmented_images(labels[0], 3)

## Extracting features

In [None]:
X_train = np.zeros((train.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))
for i, file in enumerate(train['file'].values):
    image = read_image(file)
    image_segmented = segment_image(image)
    X_train[i] = resize_image(image_segmented, (IMAGE_SIZE, IMAGE_SIZE))
# Normalize the data
x_train = X_train / 255.
print('Train Shape: {}'.format(X_train.shape))


In [None]:
x_train.shape

In [None]:
y_train = train['labels']
sns.countplot(y_train)
plt.xticks(rotation=90);

## SMOTE(Synthetic Minority Oversampling Technique)

In [None]:
# smote = SMOTE()
# smote.fit_resample(x_train,y_train)

In [None]:
y_train = train['labels']
sns.countplot(y_train)
plt.xticks(rotation=90);

## Label Encoding for targets

In [None]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_train.shape

## train validation split

In [None]:
x_train,x_val,y_train,y_val = split(x_train,y_train,stratify=y_train,test_size=0.1)
x_train.shape,x_val.shape,y_train.shape,y_val.shape

In [None]:
y_train = tf.keras.utils.to_categorical(y_train)
y_val = tf.keras.utils.to_categorical(y_val)

y_train.shape,y_val.shape

## Algorithms

### image height width and batch size lr 

In [None]:
batch_size = 64
img_height = 120
img_width = 120
lr = 0.002
beta_1=0.9
beta_2=0.999
epsilon=0.1
decay=0.0

opt = tf.keras.optimizers.Adam(learning_rate = lr,beta_1=beta_1,beta_2=beta_2,epsilon=epsilon,decay=decay)

## Callbacks

In [None]:
lr_reduce = ReduceLROnPlateau(monitor='val_accuracy', factor=0.6, patience=5, verbose=1, mode='max', min_lr=1e-5)
early_stop=EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=5)

checkpoint1 = ModelCheckpoint('vgg.h5', monitor= 'val_accuracy', mode='max', save_best_only = True, verbose= 1)
checkpoint2 = ModelCheckpoint('res.h5', monitor= 'val_accuracy', mode='max', save_best_only = True, verbose= 1)
checkpoint3 = ModelCheckpoint('inc.h5', monitor= 'val_accuracy', mode='max', save_best_only = True, verbose= 1)

callbacklist1 = [lr_reduce,checkpoint1,early_stop]
callbacklist2 = [lr_reduce,checkpoint2,early_stop]
callbacklist3 = [lr_reduce,checkpoint3,early_stop]

## Data augmentation

In [None]:
datagen = ImageDataGenerator(rotation_range=360, # Degree range for random rotations
                            width_shift_range=0.2, # Range for random horizontal shifts
                            height_shift_range=0.2, # Range for random vertical shifts
                            zoom_range=0.2, # Range for random zoom
                            horizontal_flip=True, # Randomly flip inputs horizontally
                            vertical_flip=True) # Randomly flip inputs vertically
    
datagen.fit(x_train)
train_generator = datagen.flow(x_train, y_train, batch_size=batch_size)

### vgg16


In [None]:
vgg_base_model = VGG16( input_shape = (img_height,img_width, 3),
                       include_top = False,
                       weights = 'imagenet')


In [None]:
vgg_base_model.summary()

In [None]:
for layer in vgg_base_model.layers[:15]:
    layer.trainable = False


In [None]:
x = vgg_base_model.output

x = tf.keras.layers.Flatten()(x)

x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)

x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)

x = tf.keras.layers.Dense(12, activation='softmax')(x)

vgg_model = tf.keras.models.Model(inputs=vgg_base_model.input, outputs=x)

In [None]:
vgg_model.summary()

In [None]:
vgg_model.compile(loss = 'categorical_crossentropy',
                 optimizer = opt,
                 metrics=['accuracy'])

In [None]:
init_epoch = 10
vgg_history = vgg_model.fit(train_generator,
                     epochs = init_epoch,
                     verbose = 2,
                     callbacks = callbacklist1,
                     validation_data = (x_val,y_val))

In [None]:
total_epoch = init_epoch+20
vgg_history1 = vgg_model.fit(train_generator,
                     epochs = total_epoch,
                     verbose = 2,
                    initial_epoch = vgg_history.epoch[-1],
                     callbacks = callbacklist1,
                     validation_data = (x_val,y_val))

In [None]:
total_epoch +=20
vgg_history2 = vgg_model.fit(train_generator,
                     epochs = total_epoch,
                     verbose = 2,
                    initial_epoch = vgg_history1.epoch[-1],
                     callbacks = callbacklist1,
                     validation_data = (x_val,y_val))

## resnet

In [None]:
resnet_base_model = ResNet50( input_shape = (img_height,img_width,3),include_top = False, weights = 'imagenet')
resnet_base_model.summary()

In [None]:
for layer in resnet_base_model.layers[:15]:
    layer.trainable = False


In [None]:
x = resnet_base_model.output

x = tf.keras.layers.Flatten()(x)

x = tf.keras.layers.Dense(8192, activation='relu')(x)
x = tf.keras.layers.Dropout(0.4)(x)

x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)

x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)

x = tf.keras.layers.Dense(12, activation='softmax')(x)

resnet_model = tf.keras.models.Model(inputs=resnet_base_model.input, outputs=x)

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate = lr,beta_1=beta_1,beta_2=beta_2,epsilon=epsilon,decay=decay)
resnet_model.compile(loss = 'categorical_crossentropy',
                 optimizer = opt,
                 metrics=['accuracy'])

In [None]:
init_epoch = 10
resnet_history = resnet_model.fit(train_generator,
                     epochs = init_epoch,
                     verbose = 2,
                     callbacks = callbacklist2,
                     validation_data = (x_val,y_val))


In [None]:
total_epoch = init_epoch+20
resnet_history1 = resnet_model.fit(train_generator,
                     epochs = total_epoch,
                     verbose = 2,
                    initial_epoch = resnet_history.epoch[-1],
                     callbacks = callbacklist2,
                     validation_data = (x_val,y_val))

In [None]:
total_epoch +=20
resnet_history2 = resnet_model.fit(train_generator,
                     epochs = total_epoch,
                     verbose = 2,
                    initial_epoch = resnet_history1.epoch[-1],
                     callbacks = callbacklist2,
                     validation_data = (x_val,y_val))

## Inception

In [None]:
inc_base_model = InceptionV3(input_shape = (img_height,img_width, 3),include_top = False, weights = 'imagenet')
#inc_base_model.summary()

In [None]:
x = inc_base_model.output

x = tf.keras.layers.Flatten()(x)

x = tf.keras.layers.Dense(2048, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)

x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dropout(0.25)(x)

x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)

x = tf.keras.layers.Dense(12, activation='softmax')(x)

inc_model = tf.keras.models.Model(inputs=inc_base_model.input, outputs=x)


In [None]:
inc_model.compile(loss = 'categorical_crossentropy',
                 optimizer = opt,
                 metrics=['accuracy'])

In [None]:
init_epoch = 10
inc_history = inc_model.fit(train_generator,
                     epochs = init_epoch,
                     verbose = 2,
                     callbacks = callbacklist3,
                     validation_data = (x_val,y_val))


In [None]:
total_epoch = init_epoch+20
inc_history1 = inc_model.fit(train_generator,
                     epochs = total_epoch,
                     verbose = 2,
                    initial_epoch = inc_history.epoch[-1],
                     callbacks = callbacklist3,
                     validation_data = (x_val,y_val))


In [None]:
total_epoch +=20
inc_history2 = inc_model.fit(train_generator,
                     epochs = total_epoch,
                     verbose = 2,
                    initial_epoch = inc_history1.epoch[-1],
                     callbacks = callbacklist3,
                     validation_data = (x_val,y_val))