# 1. **Importing Libraries**  📚

In [None]:
import os,shutil
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import zipfile
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm
import sklearn

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

import glob
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img


from plotly.offline import plot, iplot, init_notebook_mode
import plotly.graph_objs as go
init_notebook_mode(connected=True)
import plotly.express as px
from plotly.subplots import make_subplots
print("Tensorflow Version: {}".format(tf.__version__))

In [None]:
from IPython.core.display import HTML
HTML("""
<style>
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style>
""")

In [None]:
display(HTML("""
<style>
.output {
    display: flex;
    align-items: center;
    text-align: center;
}
</style>
"""))

# 2. DATA IMPORT/ TRAIN, VAL & TEST DIRECTORY

In [None]:

import kaggle

from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()

# Kaggle API to download the dataset in local drive
#! kaggle competitions download -c plant-pathology-2020-fgvc7

root = os.getcwd()

zipFile = 'plant-pathology-2020-fgvc7.zip'
root = os.getcwd()
filePath = os.path.join(root,zipFile)

print("[INFO] Extracting downloaded zip dataset...... \n")
if os.path.exists(os.path.join(root,'images')) == False:
    with zipfile.ZipFile(filePath, 'r') as file:
        file.extractall()      
print("[INFO] Zip extract completed...... \n")


print("[INFO] Generating train dataframe...... \n")

trainDf = pd.read_csv(os.path.join(root, "train.csv"))
print("[INFO] Total shape of train dataset: {}".format(trainDf.shape))

  
# function to get column name with value one for each row in dataframe
def get_class(row):
    return(row.index[row.apply(lambda x: x==1)][0])

# prepare a ClassName column
trainDf['DiseaseClass'] = trainDf.apply(lambda row:get_class(row), axis=1)

trainDf.tail(8)

### 2.a Validation Directory

In [None]:
# split train dataset into train and validation (80-20 ratio)
from sklearn.model_selection import train_test_split
y = trainDf["DiseaseClass"]
df_train, df_val = train_test_split(trainDf, test_size=0.2, random_state=42, stratify=y)

print("[INFO] Shape of train dataset is: {}\n".format(df_train.shape))
print("[INFO] Shape of validation dataset is: {}\n".format(df_val.shape))

In [None]:
trainDir = os.path.join(root, 'train') # train directory
valDir = os.path.join(root, "val")     # validation directory
testDir = os.path.join(root, 'test')   #test directory



print("[INFO] Creating separate dirs for disease class in train directory ! \n")
if os.path.exists(trainDir) == False:  # if directory is absent craete one
    os.mkdir('train')
    trainDir = os.path.join(root, 'train') # train directory
    os.mkdir(os.path.join(trainDir, "healthy"))
    os.mkdir(os.path.join(trainDir, "rust"))
    os.mkdir(os.path.join(trainDir, "scab"))
    os.mkdir(os.path.join(trainDir, "multiple_diseases"))
    trainImage = list(df_train["image_id"])
#     for filename in os.listdir(os.path.join(root,"images")):
#         tail = os.path.split(filename)[-1]
#         if tail.split("_")[0]== "Train":
#             trainImage.append(filename)
            
    for file in trainImage:
        file = file + '.jpg'
        index = trainDf.loc[trainDf["image_id"] == file.split(".")[0], "DiseaseClass"].index[0]
        label = trainDf.loc[trainDf["image_id"] == file.split(".")[0], "DiseaseClass"][index]
        src = os.path.join(root, 'images',file)
        dst = os.path.join(trainDir, label, file )
        shutil.copy2(src, dst)
        
print("[INFO] Creating separate dirs for disease class in validation directory ! \n") 
if os.path.exists(valDir) == False:  # if directory is absent craete one
    os.mkdir('val')
    valDir = os.path.join(root, "val") # validation directory
    os.mkdir(os.path.join(valDir, "healthy"))
    os.mkdir(os.path.join(valDir, "rust"))
    os.mkdir(os.path.join(valDir, "scab"))
    os.mkdir(os.path.join(valDir, "multiple_diseases"))
    
    valImage = list(df_val["image_id"])
    for file in valImage:
        file = file + '.jpg'
        index = trainDf.loc[trainDf["image_id"] == file.split(".")[0], "DiseaseClass"].index[0]
        label = trainDf.loc[trainDf["image_id"] == file.split(".")[0], "DiseaseClass"][index]
        src = os.path.join(root, 'images',file)
        dst = os.path.join(valDir, label, file )
        shutil.copy2(src, dst)
        

print("[INFO] Creating separate test directory ! \n")
if os.path.exists(testDir) == False:  # if directory is absent craete one
    os.mkdir('test')
    testDir = os.path.join(root, 'test')
    testImage = []
    for filename in os.listdir(os.path.join(root,"images")):
        tail = os.path.split(filename)[-1]
        if tail.split("_")[0]== "Test":
            testImage.append(filename)
    for file in testImage:
        src = os.path.join(os.path.join(root, 'images'),file)
        dst = os.path.join(root, 'test')
        shutil.copy2(src, dst)


print("[INFO] Total test images in test directory: {}".format(len(os.listdir(testDir))))


# 3.TARGET CLASS

In [None]:
import plotly.express as px

df = px.data.tips()
fig = px.parallel_categories(trainDf, dimensions=[ 'healthy', 'rust', 'multiple_diseases', 'scab'],
                             color = "healthy",
                color_continuous_scale=px.colors.sequential.Inferno,
                labels={'rust':'RUST LEAF', 'healthy':'HEALTHY LEAF', 'multiple_diseases':'MULTIPLE DISEASES', 'scab':'SCAB'},
                            title="PARALLEL CATEGORICAL PLOT OF DISEASE CLASS ")
fig.show()

In [None]:
className = ["healthy", "multiple_diseases", "rust", "scab"]
classCount = []
for name in className:
    result = sum(trainDf[trainDf[name]==1].value_counts())
    classCount.append(result)
    
data = {"ClassName":className, "Count":classCount}  
countDf = pd.DataFrame(data=data)

fig = px.bar(countDf, x='ClassName',y='Count', title="Distribution of Image Disease Class in Train Dataset",
            width=800, height=400)

fig.update_traces(marker_color='rgb(150,100,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.4)
fig.show()

### 3.a Train Dataset Visualization

In [None]:
images = os.listdir(os.path.join(root,"images"))[:12]
className = []    
plt.figure(figsize = (12,12))
for i,img in enumerate(images):
    imgPath = os.path.join(root,"images",img)
    plt.subplot(4,3,i+1)
    plt.axis('off')
    #plt.title(title, size=9, color='black')
    img = cv2.imread(imgPath)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img/255)
    #fig = px.imshow(img)
    #fig.show()
plt.show()

### 3.b Visulaizing a single leaf

In [None]:
image = os.listdir(os.path.join(root, "train", "rust"))[23]
index = trainDf.loc[trainDf["image_id"]==image.split(".")[0], "DiseaseClass"].index[0]
title = trainDf.loc[trainDf["image_id"]==image.split(".")[0], "DiseaseClass"][index]
imgPath = os.path.join(root, "train", "rust", image)
img = cv2.imread(imgPath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   #for cv2 conversion from BGR to RGB
fig = px.imshow(img/255)
fig.update_layout(title_text=str(title), title_x=0.5)
            
fig.show()


### 3.c Visulaizing a "MULTIPLE DISEASE" leaf class

In [None]:
file = os.listdir(os.path.join(root,"train","multiple_diseases"))[9]
imgPath = os.path.join(root, "train", "multiple_diseases", file)
img = cv2.imread(imgPath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img/255
fig = make_subplots(1,2)
fig.add_trace(go.Image(z=img), 1, 1)
for channel,color in enumerate(['red', 'green', 'blue']):
    fig.add_trace(go.Histogram(x=img[...,channel].ravel(), opacity = 0.5,
                              marker_color = color, name = '%s Channel'%color), 1, 2)
fig.update_layout( title = "LEAF WITH MULTIPLE DISEASE", title_x=0.5)
fig.show()

### 3.d Visulaizing a "HEALTHY" leaf class

In [None]:
file = os.listdir(os.path.join(root,"train","healthy"))[9]
imgPath = os.path.join(root, "train", "healthy", file)

img = cv2.imread(imgPath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
fig = make_subplots(1,2)
fig.add_trace(go.Image(z=img), 1, 1)
for channel,color in enumerate(['red', 'green', 'blue']):
    fig.add_trace(go.Histogram(x=img[...,channel].ravel(), opacity = 0.5,
                              marker_color = color, name = '%s Channel'%color), 1, 2)
fig.update_layout(height = 400, title = "HEALTHY LEAF", title_x=0.5)
fig.show()

### 3.e Visulaizing a "RUST" leaf class

In [None]:
file = os.listdir(os.path.join(root,"train","rust"))[9]
imgPath = os.path.join(root, "train", "rust", file)

img = cv2.imread(imgPath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
fig = make_subplots(1,2)
fig.add_trace(go.Image(z=img), 1, 1)
for channel,color in enumerate(['red', 'green', 'blue']):
    fig.add_trace(go.Histogram(x=img[...,channel].ravel(), opacity = 0.5,
                              marker_color = color, name = '%s Channel'%color), 1, 2)
fig.update_layout(height = 400, title = "RUST LEAF", title_x=0.5)
fig.show()

### 3.f Visulaizing a "SCAB" leaf class

In [None]:
file = os.listdir(os.path.join(root,"train","scab"))[9]
imgPath = os.path.join(root, "train", "scab", file)

img = cv2.imread(imgPath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
fig = make_subplots(1,2)
fig.add_trace(go.Image(z=img), 1, 1)
for channel,color in enumerate(['red', 'green', 'blue']):
    fig.add_trace(go.Histogram(x=img[...,channel].ravel(), opacity = 0.5,
                              marker_color = color, name = '%s Channel'%color), 1, 2)
fig.update_layout(height = 400, title = "SCAB LEAF", title_x=0.5)
fig.show()

# 4. PRE-PROCESSING AND IMAGE AUGMENTATION

###  4.a Canny Edge Detection

For detecting strong edge of the image, reducing noise in the image by using Gaussian kernel

In [None]:
img = os.listdir(trainDir)[:50]
plt.figure(figsize = (12,12))

imgPath = os.path.join(trainDir, img[10])

image = cv2.imread(imgPath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# CANNY EDGE ALGORITHM
edges = cv2.Canny(image, 100, 200)

# VISUALIZE THE RESULT
plt.figure(figsize=(8,5))
plt.subplot(121)
plt.axis('off')
plt.title('ORIGINAL')
plt.imshow(image[:,:,:])
                 
plt.subplot(122)
plt.axis('off')
plt.title('EDGES')
plt.imshow(edges, cmap='gray')
plt.tight_layout()
plt.show()


### 4.b Image Augmentation

In [None]:
class Augment(tf.keras.layers.Layer):  # class from which layer inherit
    def __init__(self, seed=43):
        super().__init__():
            self.augment_input = tf.keras.layers.RandomContrast(0.5, seed=seed) # random contrast
            self.augment_input = tf.keras.layers.RandomZoom(height_factor=(0.2, 0.3), width_factor=None, fill_mode='reflect', seed=seed)
            self.augment_input = tf.keras.layers.RandomTranslation(seed=seed)
    def call(self, inputs, labels):
        
    

### Create augmneted images for multiple disease class only

In [None]:
# creating an Image data generator


if os.path.exists(os.path.join(root,"train", "aug_dir"))==False:
    shutil.rmtree(os.path.join(root,"train", "aug_dir"))  #remove previously created directory

    os.mkdir(os.path.join(root,"train", "aug_dir"))
    aug_dir = os.path.join(root,"train", "aug_dir")
    datagen = ImageDataGenerator(
                                rotation_range=45,
                                width_shift_range=0.1,
                                height_shift_range=0.1,
                                zoom_range=0.1,
                                brightness_range=(0.5, 1.5),
                                horizontal_flip=True,
                                vertical_flip=True,
                                fill_mode="nearest")

    images = os.path.join(os.getcwd(), "train", "multiple_diseases")

    for fname in os.listdir(images):
        img = os.path.join(root, "train", "multiple_diseases",fname)
        img = load_img(img)
        img_arr = img_to_array(img)  #convert to array
        img_arr = tf.expand_dims(img_arr, axis=0)  # adding batch dimension 

        i = 0
        for batch in datagen.flow(img_arr,
                                 batch_size=1,
                                 save_to_dir=os.path.join(root,"train", "aug_dir"),
                                 save_prefix='Augmented_image',
                                 save_format='jpg'
                                 ):
            i += 1
            if i>5:
                break
    #Visualizating 10 Augmented Image
    
    aug_imgs = []
    for img_path in glob.glob(aug_dir+'\\*jpg')[:15]:
        img = cv2.imread(img_path)
        aug_imgs.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.figure(figsize=(20,10))
    columns = 5

    for i, image in enumerate(aug_imgs):
        plt.subplot(len(aug_imgs) / columns + 1, columns, i + 1)
        plt.axis("off")
        plt.imshow(image)
    plt.title("AUGMENTED MULTIPLE DISEASES CLASS IMAGE")
    plt.show()

    # Move augmented image to train/multiple_diseases directory
    shutil.copyfile(src, dst)  # permission error