In [None]:
! ls '/kaggle/input/plant-seedlings-classification/'
! ls '/kaggle/input/oversampling-plant-seedling/'

In [None]:
import os
import gc
import numpy as np 
import matplotlib.pyplot as plt
import PIL
import PIL.Image
import tensorflow as tf
print(tf.__version__)
print("Is there a GPU available: ", end = ' '),
print(tf.test.is_gpu_available())
import cv2
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

memory_freed = gc.collect()
print('Memory made free: ',memory_freed)

In [None]:
# Setting all the variables and constants as configuration
configuration = {
    'train_path_org': '/kaggle/input/plant-seedlings-classification/train',
    'train_path': '/kaggle/input/oversampling-plant-seedling/balanced_data',
    'test_path': '/kaggle/input/plant-seedlings-classification/test/',
    'csv_path': '/kaggle/input/plant-seedlings-classification/sample_submission.csv',
    
    'validation_split': 0.25,
    'buffer_size': 1000,
    'batch_size': 32,
    'img_height': 250,
    'img_width': 250,
    'num_classes' : 12,
    
    'drop_out_rate': 0.2,
    'epochs': 20,
    'fine_tune_flag': True,
    'fine_tune_epochs': 20,
    'vgg_finetune': 17,
    'resnet_finetune': 171,
    'inception_finetune': 251,
    
    'early_stopping': tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                    mode='auto',
                                    verbose=1,
                                    patience=5),
    'reduce_lr_on_platue': tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                    patience=3,
                                    verbose=1,
                                    factor=.5, 
                                    min_lr=0.0000001),
    
    'vgg_checkpoint': tf.keras.callbacks.ModelCheckpoint('vgg_model.h5', 
                                    monitor='val_loss', 
                                    verbose=1, 
                                    save_best_only=True, 
                                    mode='min'),
    'resnet_checkpoint': tf.keras.callbacks.ModelCheckpoint('resnet_model.h5', 
                                    monitor='val_loss', 
                                    verbose=1, 
                                    save_best_only=True, 
                                    mode='min'),
    'inception_checkpoint': tf.keras.callbacks.ModelCheckpoint('inception_model.h5', 
                                    monitor='val_loss', 
                                    verbose=1, 
                                    save_best_only=True, 
                                    mode='min'),
    
    'optimizer': tf.keras.optimizers.Adam(),
    'loss': tf.losses.SparseCategoricalCrossentropy(from_logits=True),
    'accuracy': ['accuracy'],
    
    'vgg_16_base': tf.keras.applications.VGG16(
                                input_shape=(250, 250, 3),
                                include_top=False,
                                weights='imagenet'),
    'resnet_50_base': tf.keras.applications.ResNet50(
                                input_shape=(250, 250, 3),
                                include_top=False,
                                weights='imagenet'),
    'inception_v3_base': tf.keras.applications.InceptionV3(
                                input_shape=(250, 250, 3),
                                include_top=False,
                                weights='imagenet'), 
}

# Class Balance of original dataset

In [None]:
path = configuration['train_path_org']
folder_name, img_count = [], []
for i in os.listdir(path):
    folder_name.append(i)
    count = len(os.listdir(os.path.join(path,i)))
    img_count.append(count)
    print("Folder Name : ",i)
    print("No of Images : ",count)

fig = plt.figure(figsize = (20, 5)) 
  
# creating the bar plot 
plt.bar(folder_name, img_count, color ='maroon',  
        width = 0.4) 
  
plt.xlabel("Folder Name") 
plt.ylabel("No of images in each folder") 
plt.title("Image Frequency")

fig.autofmt_xdate()
plt.show() 

# Class Balance of oversampled dataset

In [None]:
path = configuration['train_path']
folder_name, img_count = [], []
for i in os.listdir(path):
    folder_name.append(i)
    count = len(os.listdir(os.path.join(path,i)))
    img_count.append(count)
    print("Folder Name : ",i)
    print("No of Images : ",count)

fig = plt.figure(figsize = (20, 5)) 
  
# creating the bar plot 
plt.bar(folder_name, img_count, color ='maroon',  
        width = 0.4) 
  
plt.xlabel("Folder Name") 
plt.ylabel("No of images in each folder") 
plt.title("Image Frequency")

fig.autofmt_xdate()
plt.show() 

# Working with tf.data

In [None]:
import pathlib
data_dir = pathlib.Path(configuration['train_path'])
print(data_dir)

In [None]:
image_count = len(list(data_dir.glob('*/*.png')))
print(image_count)

In [None]:
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'))
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)

# For Inception
# as it requires different type of preprocessing compared to vgg and resnet
inception_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'))
inception_ds = inception_ds.shuffle(image_count, reshuffle_each_iteration=False)

In [None]:
for f in list_ds.take(3):
    print(f.numpy())
    s = f.numpy()

In [None]:
class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)
class_map = {}
for i in range(len(class_names)):
    class_map[i] = class_names[i]
print('\n\n',class_map)

In [None]:
val_size = int(image_count * 0.2)

train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)

inception_train = inception_ds.skip(val_size)
inception_val = inception_ds.take(val_size)

In [None]:
print(tf.data.experimental.cardinality(train_ds).numpy())
print(tf.data.experimental.cardinality(val_ds).numpy())

In [None]:
def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The second to last is the class-directory
    one_hot = parts[-2] == class_names
    label_classes = list(range(12))
    tensor = tf.constant(label_classes,dtype = tf.int64)
    ans = tf.boolean_mask(tensor,one_hot)
    return ans[0]

def augment(image):
    image = tf.image.random_brightness(image, max_delta=0.2) # Random brightness
    image = tf.image.random_contrast(image, 0.8, 1)
    return image

def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    
    # Preprocess
    img = tf.keras.applications.vgg16.preprocess_input(img)
    
    # Cast
    img = tf.cast(img,tf.float32)
    
    # Augment
    img = augment(img)
    
    # resize the image to the desired size
    img = tf.image.resize(img, [configuration['img_height'], configuration['img_width']])
    return img

def process_path(file_path):
    label = get_label(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

labeled_ds = list_ds.map(process_path)

In [None]:
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)

def configure_for_performance(ds):
    ds = ds.cache()
    ds = ds.shuffle(buffer_size=configuration['buffer_size'])
    ds = ds.batch(configuration['batch_size'])
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds

train_ds = configure_for_performance(train_ds)
val_ds = configure_for_performance(val_ds)

In [None]:
def inception_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    
    # Cast
    img = tf.cast(img,tf.float32)
    img = img/255.
    
    # Augment
    img = augment(img)
    
    # resize the image to the desired size
    img = tf.image.resize(img, [configuration['img_height'], configuration['img_width']])
    return img


def process_inception(file_path):
    label = get_label(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = inception_img(img)
    return img, label

inception_ds = list_ds.map(process_inception)

for image, label in inception_ds.take(1):
    print("Image shape: ", image.numpy().shape)
    print("Label: ", label.numpy())
    plt.imshow(image)
    plt.title(class_map[label.numpy()])

In [None]:
image_batch, label_batch =  [],[]

for image, label in inception_ds.take(10):
    image_batch.append(image)
    label_batch.append(label)


plt.figure(figsize=(10, 10))
for i in range(6):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image_batch[i].numpy().astype("float32"))
    label = label_batch[i]
    plt.title(class_names[label])
    plt.axis("off")

In [None]:
inception_train = inception_train.map(process_inception, num_parallel_calls=AUTOTUNE)
inception_val = inception_val.map(process_inception, num_parallel_calls=AUTOTUNE)

inception_train = configure_for_performance(inception_train)
inception_val = configure_for_performance(inception_val)

# Model template

In [None]:
def build_model(base_model):
    
    base_model.trainable = False
    
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(configuration['num_classes']),
    ])

    model.compile(
        optimizer  = configuration['optimizer'],
        loss = configuration['loss'],
        metrics = configuration['accuracy']
    )
    
    return model

## Accuracy and Loss plotting template

In [None]:
def plot_graphs(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.ylim([min(plt.ylim()),1])
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.ylabel('Cross Entropy')
    plt.ylim([0,1.0])
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()

# VGG 16

In [None]:
memory_freed = gc.collect()
print('Memory made free: ',memory_freed)

In [None]:
vgg_base = configuration['vgg_16_base']
vgg_model = build_model(vgg_base)

print(vgg_model.summary())
print('\n\n')

history = vgg_model.fit(
    train_ds,
    validation_data = val_ds,
    epochs = configuration['epochs'],
    callbacks = [
                configuration['early_stopping'], 
                configuration['reduce_lr_on_platue'], 
                configuration['vgg_checkpoint']
                ]
    )

# Plotting the graphs
plot_graphs(history)

# Fine-Tune VGG 16

In [None]:
memory_freed = gc.collect()
print('Memory made free: ',memory_freed)

In [None]:
vgg_base.trainable = True
print('Total No of layers: ',len(vgg_base.layers))

# Fine-tune from this layer onwards
fine_tune_at = configuration['vgg_finetune'] # 17

# Freeze all the layers before the `fine_tune_at` layer
for layer in vgg_base.layers[:fine_tune_at]:
    layer.trainable =  False
    
# Check the trainable status of the individual layers
for layer in vgg_base.layers:
    print(layer, layer.trainable)

print('\n\n')
    
vgg_model.compile(
    optimizer  = configuration['optimizer'],
    loss = configuration['loss'],
    metrics = configuration['accuracy']
)

print('No of trainable variables: ',len(vgg_model.trainable_variables))
print(vgg_model.summary())

In [None]:
if(configuration['fine_tune_flag']):
    history_fine = vgg_model.fit(train_ds,
                             validation_data=val_ds,
                             epochs=configuration['fine_tune_epochs'],
                             callbacks=[configuration['early_stopping'], 
                                        configuration['reduce_lr_on_platue'], 
                                        configuration['vgg_checkpoint']]
                            )
    
    # Plotting the graphs
    plot_graphs(history_fine)

### VGG16 Classification Report

In [None]:
loaded_model = tf.keras.models.load_model('vgg_model.h5')
y_true = []
y_pred = []

for image,label in val_ds.take(-1):
    ans = loaded_model.predict(image)
    for i in range(32):
        ans_predicted = np.argmax(ans[i])
        ans_actual = label[i].numpy()
        y_true.append(ans_actual)
        y_pred.append(ans_predicted)

print(class_map)
print('\n\nClassification Report: ')
print(classification_report(y_true, y_pred, target_names=class_map.values()))
print('\n\nConfusion Matrix: ')
print(confusion_matrix(y_true, y_pred))

In [None]:
import pandas as pd
import seaborn as sn
%matplotlib inline

data = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(data, columns=np.unique(y_true), index = np.unique(y_true))
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (20,6))
sn.set(font_scale=1.4)#for label size
sn.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 16}, fmt='d')# font size

# Resnet 50

In [None]:
memory_freed = gc.collect()
print('Memory made free: ',memory_freed)

In [None]:
resnet_base = configuration['resnet_50_base']
resnet_model = build_model(resnet_base)

print(resnet_model.summary())
print('\n\n')

history = resnet_model.fit(
    train_ds,
    validation_data = val_ds,
    epochs = configuration['epochs'],
    callbacks = [
                configuration['early_stopping'], 
                configuration['reduce_lr_on_platue'], 
                configuration['resnet_checkpoint']
                ]
    )

# Plotting the graphs
plot_graphs(history)

# Fine-Tune ResNet 50

In [None]:
memory_freed = gc.collect()
print('Memory made free: ',memory_freed)

In [None]:
resnet_base.trainable = True
print('Total No of layers: ',len(resnet_base.layers))

# Fine-tune from this layer onwards
fine_tune_at = configuration['resnet_finetune'] # 171

# Freeze all the layers before the `fine_tune_at` layer
for layer in resnet_base.layers[:fine_tune_at]:
    layer.trainable =  False

print('\n\n')
    
resnet_model.compile(
    optimizer  = configuration['optimizer'],
    loss = configuration['loss'],
    metrics = configuration['accuracy']
)

print('No of trainable variables: ',len(resnet_model.trainable_variables))
print(resnet_model.summary())

In [None]:
if(configuration['fine_tune_flag']):
    history_fine = resnet_model.fit(train_ds,
                             validation_data=val_ds,
                             epochs=configuration['fine_tune_epochs'],
                             callbacks=[configuration['early_stopping'],
                                        configuration['reduce_lr_on_platue'], 
                                        configuration['resnet_checkpoint']]
                            )
    
    # Plotting the graphs
    plot_graphs(history_fine)

### ResNet Classification Report

In [None]:
loaded_model = tf.keras.models.load_model('resnet_model.h5')
y_true = []
y_pred = []

for image,label in val_ds.take(-1):
    ans = loaded_model.predict(image)
    for i in range(32):
        ans_predicted = np.argmax(ans[i])
        ans_actual = label[i].numpy()
        y_true.append(ans_actual)
        y_pred.append(ans_predicted)

print(class_map)
print('\n\nClassification Report: ')
print(classification_report(y_true, y_pred, target_names=class_map.values()))
print('\n\nConfusion Matrix: ')
print(confusion_matrix(y_true, y_pred))

In [None]:
import pandas as pd
import seaborn as sn
%matplotlib inline

data = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(data, columns=np.unique(y_true), index = np.unique(y_true))
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (20,6))
sn.set(font_scale=1.4)#for label size
sn.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 16}, fmt='d')# font size

# Inception V3

In [None]:
memory_freed = gc.collect()
print('Memory made free: ',memory_freed)

In [None]:
inception_base = configuration['inception_v3_base']
inception_model = build_model(inception_base)

print(inception_model.summary())
print('\n\n')

history = inception_model.fit(
    inception_train,
    validation_data = inception_val,
    epochs = configuration['epochs'],
    callbacks = [
                configuration['early_stopping'], 
                configuration['reduce_lr_on_platue'], 
                configuration['inception_checkpoint']
                ]
    )

# Plotting the graphs
plot_graphs(history)

# Fine-Tune InceptionNet v3

In [None]:
memory_freed = gc.collect()
print('Memory made free: ',memory_freed)

In [None]:
inception_base.trainable = True
print('Total No of layers: ',len(inception_base.layers))

# Fine-tune from this layer onwards
# fine_tune_at = configuration['inception_finetune']
fine_tune_at = 251

# Freeze all the layers before the `fine_tune_at` layer
for layer in inception_base.layers[:fine_tune_at]:
    layer.trainable =  False
    
inception_model.compile(
    optimizer  = configuration['optimizer'],
    loss = configuration['loss'],
    metrics = configuration['accuracy']
)

print('No of trainable variables: ',len(inception_model.trainable_variables))
print(inception_model.summary())

In [None]:
if(configuration['fine_tune_flag']):
    history_fine = inception_model.fit(inception_train,
                             validation_data=inception_val,
                             epochs=configuration['fine_tune_epochs'],
                             callbacks=[configuration['early_stopping'], 
                                        configuration['reduce_lr_on_platue'], 
                                        configuration['inception_checkpoint']]
                            )
    
    # Plotting the graphs
    plot_graphs(history_fine)

### Inception Classification Report

In [None]:
loaded_model = tf.keras.models.load_model('inception_model.h5')
y_true = []
y_pred = []

for image,label in inception_val.take(-1):
    ans = loaded_model.predict(image)
    for i in range(32):
        ans_predicted = np.argmax(ans[i])
        ans_actual = label[i].numpy()
        y_true.append(ans_actual)
        y_pred.append(ans_predicted)

print(class_map)
print('\n\nClassification Report: ')
print(classification_report(y_true, y_pred, target_names=class_map.values()))
print('\n\nConfusion Matrix: ')
print(confusion_matrix(y_true, y_pred))

In [None]:
import pandas as pd
import seaborn as sn
%matplotlib inline

data = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(data, columns=np.unique(y_true), index = np.unique(y_true))
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (20,6))
sn.set(font_scale=1.4)#for label size
sn.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 16}, fmt='d')# font size

In [None]:
memory_freed = gc.collect()
print('Memory made free: ',memory_freed)