<h1>Importing Libraries</h1>

In [None]:
import os
import gc
import re

import cv2
import math
import random
import numpy as np
import scipy as sp
import pandas as pd

import tensorflow as tf
from IPython.display import SVG
from keras.utils import plot_model
import tensorflow.keras.layers as L
from keras.utils import model_to_dot
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.applications import DenseNet201, InceptionV3, ResNet50V2, InceptionResNetV2, VGG19, VGG16

import seaborn as sns
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

tqdm.pandas()
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

np.random.seed(0)
tf.random.set_seed(0)

import warnings
warnings.filterwarnings("ignore")


import cv2 as cv
from skimage import filters
from skimage import morphology

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import matplotlib.pyplot as plt

from kaggle_datasets import KaggleDatasets
import tensorflow as tf
print(f"Tensorflow version: {tf.__version__}")
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

SEED = 5

<h1>Loading data</h1>

In [None]:
EPOCHS = 40
SAMPLE_LEN = 1821
INPUT_PATH = '/kaggle/input/plant-pathology-2020-fgvc7/'
IMG_PATH = INPUT_PATH + 'images/'
TRAIN_DATA = INPUT_PATH + 'train.csv'
TEST_DATA = INPUT_PATH + 'test.csv'
SAMPLE_SUB = INPUT_PATH + 'sample_submission.csv'


In [None]:
train_df = pd.read_csv(TRAIN_DATA)
test_df = pd.read_csv(TEST_DATA)
sampleSubmission_df = pd.read_csv(SAMPLE_SUB)

<h1>EDA</h1>

In [None]:
EDA_IMG_SHAPE = (512,256)

def getImage(image_id,SHAPE=EDA_IMG_SHAPE):
    img = cv.imread(IMG_PATH + image_id + '.jpg')
    img = cv.resize(img,SHAPE)
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    
    return img

In [None]:
%%time

healthy = [getImage(image_id) for image_id in train_df[train_df['healthy']==1].iloc[:,0]]

multiple_diseases = [getImage(image_id) for image_id in train_df[train_df['multiple_diseases']==1].iloc[:,0]]

rust = [getImage(image_id) for image_id in train_df[train_df['rust']==1].iloc[:,0]]

scab = [getImage(image_id) for image_id in train_df[train_df['scab']==1].iloc[:,0]]

In [None]:
classes = {'healthy':healthy, 'multiple_diseases':multiple_diseases, 'rust':rust, 'scab': scab} 

In [None]:
#Exploratory Data Analysis

In [None]:
def plotlyDataFrame(df,title):
    
    fig = go.Figure(data=[go.Table(
    header = dict(values = df.columns),
    cells = dict(values = [df[col] for col in df.columns]))])
    
    fig.update_layout(
        title = title)
    
    fig.show()

In [None]:
#Train DataFrame

In [None]:
plotlyDataFrame(train_df.iloc[:15,:],'Train Data')

In [None]:
#Test DataFrame

In [None]:
plotlyDataFrame(test_df.iloc[:15,:],'Test Data')

In [None]:
#Sample Submission File
plotlyDataFrame(sampleSubmission_df.iloc[:15,:], 'Sample Submission')

<h2>Class distributions</h2>

In [None]:


fig = go.Figure(data=[go.Pie(labels=train_df.columns[1:],
                             values=[np.sum(train_df[col]) for col in train_df.columns[1:]])])

fig.update_traces(hoverinfo='label+percent',
                  textinfo='value',
                  textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))

fig.update_layout(title_text="Target Distribution of Training-Data ")

fig.show()

In [None]:
## Image Augmentation
#Image data augmentation is a technique that can be used to artificially expand the size of a training dataset by creating modified versions of images in the dataset.

#Training deep learning neural network models on more data can result in more skillful models, and the augmentation techniques can create variations of the images that can improve the ability of the fit models to generalize what they have learned to new images.

#Transforms include a range of operations from the field of image manipulation, such as shifts, flips, zooms, and much more.

In [None]:
#Sample Image

def getRandomImage():
    return random.choice(classes[random.choice(train_df.columns[2:])])

In [None]:
img = getRandomImage()

In [None]:
fig = go.Figure(go.Image(z=img))

fig.update_layout(title_text="Smaple Image")

fig.show()

In [None]:
#Image Augmentation With ImageDataGenerator
#We can use this to generate augmented images using different transformations like:

#Vertical Flipping
#Horizontal Flipping
#Shifted Images
#Rotated Images
#Zoomed Images
#Images with different Brightness levels etc.
#Plus point of using this is that it will generate images in runtime (While training a model). It means we are not supposed to store this augmented images.

In [None]:
img = np.expand_dims(img,axis=0)

In [None]:
%%time

generator = tf.keras.preprocessing.image.ImageDataGenerator(vertical_flip=True,
                                                            horizontal_flip=True,
                                                            brightness_range=[0.5,1.5],
                                                            zoom_range=[0.5,1.1])

iterator = generator.flow(img,batch_size=1)

# fig = make_subplots(10,3,horizontal_spacing=0.01,vertical_spacing=0.01)

# for i in range(30):
#     image = iterator.next()[0].astype('uint8')
    
#     fig.add_trace(go.Image(z=image),i//3 + 1,i%3 + 1)

# fig.update_layout(title_text="Augmented Images of the sample image",
#                  height=128*10 + 20,
#                  width=256*3 + 20)

# fig.update_xaxes(showticklabels=False)
# fig.update_yaxes(showticklabels=False)

# fig.show()

fig, ax = plt.subplots(nrows=5, ncols=3, figsize=(15,10))

ax[0,0].imshow(img[0])
ax[0,0].set_title("Sample Image",fontsize=10)
ax[0,0].set_xticks([])
ax[0,0].set_yticks([])


for i in range(1,15):
    
    image = iterator.next()[0].astype('uint8')
    
    ax[i//3,i%3].imshow(image)
    ax[i//3,i%3].set_xticks([])
    ax[i//3,i%3].set_yticks([])

fig.suptitle("Augmented Images of the sample image",fontsize=20)

plt.show()

In [None]:
#Image Processing

#I'll use Open-CV for image processing. Image processing can help us enhance our Classification-Model.
#In our case we just want the affected leaf hence I'll try to separate out that leaf from the unnecessary background. We can use image-segmentation techniques to do this.

In [None]:
sampleImg = getRandomImage()

In [None]:
def convertToHSV(img):
    return cv.cvtColor(img,cv.COLOR_RGB2HSV_FULL)

In [None]:
#ROI (Region Of Interest) Selection


#To select the ROI, I have used Canny edge detection to detect the edges of the leaves 
#and to find the edges of the rectangle ROI, I have used this: getROI method.


In [None]:

def getROI(img):
    # convert the image to the gray-scale image
    gray = cv.cvtColor(img,cv.COLOR_RGB2GRAY)
    
    # Detect the edges in the image using canny edge detection
    edged = cv.Canny(gray,150,200)
    
    xm = img.shape[1]//2    # Middle coordinate of the x-axis (width of the image)
    ym = img.shape[0]//2    # Middle coordinate of the y-axis (height of the image)
    
    # to find the bottom-y coordinate to the Rectangle-ROI
    for i in range(img.shape[0]-1,-1,-1):
        if np.sum(edged[i,xm-5:xm+5])!=0:
            y_bottom = np.where(i+10<img.shape[0]-1,i+10,img.shape[0]-2)
            break
            
    # to find the top-y coordinate to the Rectangle-ROI
    for i in range(img.shape[0]):
        if np.sum(edged[i,xm-5:xm+5])!=0:
            y_top = np.where(i-10>1,i-10,2)
            break
    
    # to find the top-x coordinate to the Rectangle-ROI
    for i in range(img.shape[1]):
        if np.sum(edged[ym-5:ym+5,i])!=0:
            x_top = np.where(i-10>1,i-10,2)
            break
            
    # to find the bottom-x coordinate to the Rectangle-ROI
    for i in range(img.shape[1]-1,-1,-1):
        if np.sum(edged[ym-5:ym+5,i])!=0:
            x_bottom = np.where(i+10<img.shape[1]-1,i+10,img.shape[1]-2)
            break

    return edged,(x_top,y_top,x_bottom,y_bottom)

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(20, 13))

for i in range(3):
    orignal = getRandomImage()
    edged, coordinates = getROI(orignal)
    
    roi = orignal.copy()
    
    (x_top,y_top,x_bottom,y_bottom) = coordinates
    
    roi[y_top-2:y_top,x_top:x_bottom+1] = [255,0,0]        # Top-edge
    roi[y_bottom:y_bottom+2,x_top:x_bottom+1] = [255,0,0]  # Bottom-edge
    roi[y_top:y_bottom+1,x_top-2:x_top] = [255,0,0]        # Left-edge
    roi[y_top:y_bottom+1,x_bottom:x_bottom+2] = [255,0,0]  # Right-edge
    
    ax[i,0].imshow(orignal)
    ax[i,0].set_title('Original Image', fontsize=15)
    ax[i,1].imshow(edged, cmap='gray')
    ax[i,1].set_title('Detected Edges', fontsize=15)
    ax[i,2].imshow(roi)
    ax[i,2].set_title('ROI', fontsize=15)
    
fig.suptitle("ROI selection using Canny Edge Detection",fontsize=20)
    
plt.show()

In [None]:
#This method will only work if the target leaf is in the middle area of the image & 
#the images are of good quality. There could be many cases when it will not give the 
#desired outputs.
#Also, we can see it is not giving accurate results for many of the images & we will 
#not get a good quality of images every time in real-world scenarios. Hence, I'll try 
#some other methods to accurately get the ROI, not as a rectangle but as the shape of 
#the target leaf.

In [None]:
#ROI selection using Watershed Transformation

#Any grayscale image can be viewed as a topographic surface where high intensity denotes 
#peaks and hills while low intensity denotes valleys. You start filling every isolated 
#valleys (local minima) with different colored water (labels). As the water rises, depending 
#on the peaks (gradients) nearby, water from different valleys, obviously with different 
#colors will start to merge. To avoid that, you build barriers in the locations where water 
#merges. You continue the work of filling water and building barriers until all the peaks are 
#under water. Then the barriers you created gives you the segmentation result. This is the 
#“philosophy” behind the watershed.

#You can visit the CMM webpage on watershed to understand it with the help of some animations.

In [None]:
%%time

fig, ax = plt.subplots(nrows=6, ncols=3, figsize=(15,15))

for i in range(3):
    orignal = getRandomImage()
#     blur = cv.bilateralFilter(orignal,9,75,75)
    
    gray = cv.cvtColor(orignal,cv.COLOR_RGB2GRAY)
    sobel = filters.sobel(gray)
    
#     sobel = cv.morphologyEx(sobel, cv.MORPH_OPEN, kernel)
#     blurred = cv.bilateralFilter(sobel.astype('float32'),9,75,75)
    blurred = filters.gaussian(sobel, sigma=2.0)
    
    ym = blurred.shape[0]//2
    xm = blurred.shape[1]//2
    
    markers = np.zeros(blurred.shape,dtype=np.int)
    # using corners of the image as background
    markers[0,0:2*xm] = 1
    markers[2*ym-1,0:2*xm] = 1
    markers[0:2*ym,0] = 1
    markers[0:2*ym,2*xm-1] = 1
    
    # using middle part of the image as foreground
    markers[ym-50:ym+50,xm-20:xm+20] = 2
    
    mask = morphology.watershed(blurred, markers)
    
    ax[0,i].imshow(orignal)
    ax[0,i].set_title('Original Image', fontsize=12)
    
    ax[1,i].imshow(gray, cmap='gray')
    ax[1,i].set_title('Gray Image', fontsize=12)
    
    ax[2,i].imshow(sobel, cmap='gray')
    ax[2,i].set_title('After Sobel Filter', fontsize=12)
    
    ax[3,i].imshow(blurred, cmap='gray')
    ax[3,i].set_title('Blurred Image', fontsize=12)
    
    ax[4,i].imshow(mask, cmap='gray')
    ax[4,i].set_title('Mask', fontsize=12)
    
    orignal[mask==1,:] = [0,0,0]
    
    ax[5,i].imshow(orignal)
    ax[5,i].set_title('Segmented Image', fontsize=12)
    

for i in range(6):
    for j in range(3):
        ax[i,j].set_xticks([])
        ax[i,j].set_yticks([])
    
fig.suptitle("Image Segmentation (ROI selection) using Watershed Transformation",fontsize=20)
    
plt.show()

In [None]:
#As we can see it is able to extract the foreground from the image but it is not accurate 
#as there is so much noise (unnecessary edges) in the image. This method will not work if 
#the target leaf is not in the middle of the image. Maybe we can assign foreground & 
#background markers using some different technique to improve the performance of this method.
#We can use Canny edge detection before applying this method to reduce the area of focus.








In [None]:
### HSV Conversion

#**HSV** is closer to how humans perceive color. It has three components: **Hue, Saturation, and Value**.  This color space describes colors (hue or tint) in terms of their shade (saturation or amount of gray) and their brightness value.  

#The HSV color wheel sometimes appears as a cone or cylinder, but always with these three components:

#1) Hue  
#Hue is the color portion of the model, expressed as a number from 0 to 360 degrees:  
#* Red falls between 0 and 60 degrees.
#* Yellow falls between 61 and 120 degrees.
#* Green falls between 121-180 degrees.
#* Cyan falls between 181-240 degrees.
#* Blue falls between 241-300 degrees.
#* Magenta falls between 301-360 degrees.

#2) Saturation  
#Saturation describes the amount of gray in a particular color, from 0 to 100 percent. Reducing this component toward zero introduces more gray and produces a faded effect. Sometimes, saturation appears as a range from just 0-1, where 0 is gray, and 1 is a primary color.
  
#3) Value (or Brightness)  
#Value works in conjunction with saturation and describes the brightness or intensity of the color, from 0-100 percent, where 0 is completely black, and 100 is the brightest and reveals the most color.

In [None]:
#HSV Conversion

fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10, 10))

for i in range(3):
    orignal = getRandomImage()
    hsv = convertToHSV(orignal)
    
    ax[i,0].imshow(orignal)
    ax[i,0].set_title('Original Image', fontsize=15)
    ax[i,1].imshow(hsv, cmap='gray')
    ax[i,1].set_title('HSV Image', fontsize=15)
    
fig.suptitle("RGB to HSV Conversion",fontsize=20)
    
plt.show()

In [None]:
##Gray Scale Conversion
fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10, 10))

for i in range(3):
    orignal = getRandomImage()
    gray = cv.cvtColor(orignal,cv.COLOR_RGB2GRAY)
    
    ax[i,0].imshow(orignal)
    ax[i,0].set_title('Original Image', fontsize=15)
    ax[i,1].imshow(gray, cmap='gray')
    ax[i,1].set_title('Gray Image', fontsize=15)
    
fig.suptitle("RGB to Gray Scale Conversion",fontsize=20)

plt.show()

<h2>Image Examples</h2>

In [None]:
#Function for showing image
def show_images(image_ids):
    
    col = 5
    row = min(len(image_ids) // col, 5)
    
    fig, ax = plt.subplots(row, col, figsize=(16, 8))
    ax = ax.flatten()

    for i, image_id in enumerate(image_ids):
        image = cv2.imread(IMAGE_PATH + '/{}.jpg'.format(image_id))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        ax[i].set_axis_off()
        ax[i].imshow(image)
        ax[i].set_title(image_id)

<h3>Random samples</h3>

<h1>Setup TPU Config</h1>

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)

BATCH_SIZE = 16 * strategy.num_replicas_in_sync
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

<h1>Load labels and paths</h1>

In [None]:
sub = pd.read_csv(SAMPLE_SUB)
test_data = pd.read_csv(TEST_DATA)
train_data = pd.read_csv(TRAIN_DATA)

In [None]:
def format_path(st):
    return GCS_DS_PATH + '/images/' + st + '.jpg'

test_paths = test_data.image_id.apply(format_path).values
train_paths = train_data.image_id.apply(format_path).values

train_labels = np.float32(train_data.loc[:, 'healthy':'scab'].values)
train_paths, valid_paths, train_labels, valid_labels =\
train_test_split(train_paths, train_labels, test_size=0.15, random_state=42)

In [None]:
def decode_image(filename, label=None, image_size=(512, 512)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None): # Data augmentations
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_saturation(image, 0.7, 1.3)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    imgae = tf.image.random_brightness(image, 0.1)
    
    if label is None:
        return image
    else:
        return image, label

<h1>Creating Dataset objects</h1>

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .map(data_augment, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)

test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
)

<h1>Defining Learning-Rate Scheduler</h1>

In [None]:
def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):
    lr_max = lr_max * strategy.num_replicas_in_sync

    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) *\
                 lr_exp_decay**(epoch - lr_rampup_epochs\
                                - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

<h1>Defining hyperparameters of fit</h1>

In [None]:
lrfn = build_lrfn()
STEPS_PER_EPOCH = train_labels.shape[0] // BATCH_SIZE
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)

<h1>Function for visualizing training and validation accuracy</h1>

In [None]:
def display_training_curves(training, validation, yaxis):
    if yaxis == "loss":
        ylabel = "Loss"
        title = "Loss vs. Epochs"
    else:
        ylabel = "Accuracy"
        title = "Accuracy vs. Epochs"
        
    fig = go.Figure()
        
    fig.add_trace(
        go.Scatter(x=np.arange(1, EPOCHS+1), mode='lines+markers', y=training, marker=dict(color="dodgerblue"),
               name="Train"))
    
    fig.add_trace(
        go.Scatter(x=np.arange(1, EPOCHS+1), mode='lines+markers', y=validation, marker=dict(color="darkorange"),
               name="Val"))
    
    fig.update_layout(title_text=title, yaxis_title=ylabel, xaxis_title="Epochs", template="plotly_white")
    fig.show()

<h1>Modelling</h1>

<h2>1. DenseNet</h2>

In [None]:
#Setting the model to train in TPU
with strategy.scope():
    model = tf.keras.Sequential([DenseNet201(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
#Fundamental Block
SVG(tf.keras.utils.model_to_dot(Model(model.layers[0].input, model.layers[0].layers[13].output), dpi=70).create(prog='dot', format='svg'))

In [None]:
#Model Architecture
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
#Training
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
print("Loss of the model is - " , model.evaluate(valid_dataset)[0])
#print("Accuracy of the model is - " , model.evaluate(train_dataset)[1]*100 , "%")

In [None]:
#Visualizing train and valid accuracy
display_training_curves(
    history.history['categorical_accuracy'], 
    history.history['val_categorical_accuracy'], 
    'accuracy')

In [None]:
#Visualizing train and valid loss
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss')

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='blue', label='Training Accuracy')
plt.plot(epochs, val_acc, color='red', label='Validation Accuracy')
plt.title('Training and Validation Accuracy with DenseNet')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='blue', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss with DenseNet')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
#Sample predictions
#Now, I will visualize some sample predictions made by the EfficientNet NoisyStudent model. The red bars represent the model's prediction (maximum probability), the green represent the ground truth (label), and the rest of the bars are blue. When the model predicts correctly, the prediction bar is green.

In [None]:
#Prediction
probs_dnn = model.predict(test_dataset, verbose=1)
sub.loc[:, 'healthy':] = probs_dnn
sub.to_csv('submission_densenet.csv', index=False)
sub.head()

#LB:0.96792

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
# Distribution graphs (histogram/bar graph) of column data
def plotPerColumnDistribution(df, nGraphShown, nGraphPerRow):
    nunique = df.nunique()
    df = df[[col for col in df if nunique[col] > 1 and nunique[col] < 50]] # For displaying purposes, pick columns that have between 1 and 50 unique values
    nRow, nCol = df.shape
    columnNames = list(df)
    nGraphRow = (nCol + nGraphPerRow - 1) / nGraphPerRow
    plt.figure(num = None, figsize = (6 * nGraphPerRow, 8 * nGraphRow), dpi = 80, facecolor = 'w', edgecolor = 'k')
    for i in range(min(nCol, nGraphShown)):
        plt.subplot(nGraphRow, nGraphPerRow, i + 1)
        columnDf = df.iloc[:, i]
        if (not np.issubdtype(type(columnDf.iloc[0]), np.number)):
            valueCounts = columnDf.value_counts()
            valueCounts.plot.bar()
        else:
            columnDf.hist()
        plt.ylabel('counts')
        plt.xticks(rotation = 90)
        plt.title(f'{columnNames[i]} (column {i})')
    plt.tight_layout(pad = 1.0, w_pad = 1.0, h_pad = 1.0)
    plt.show()

In [None]:
# Correlation matrix
def plotCorrelationMatrix(df, graphWidth):
    filename = df.dataframeName
    df = df.dropna('columns') # drop columns with NaN
    df = df[[col for col in df if df[col].nunique() > 1]] # keep columns where there are more than 1 unique values
    if df.shape[1] < 2:
        print(f'No correlation plots shown: The number of non-NaN or constant columns ({df.shape[1]}) is less than 2')
        return
    corr = df.corr()
    plt.figure(num=None, figsize=(graphWidth, graphWidth), dpi=80, facecolor='w', edgecolor='k')
    corrMat = plt.matshow(corr, fignum = 1)
    plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
    plt.yticks(range(len(corr.columns)), corr.columns)
    plt.gca().xaxis.tick_bottom()
    plt.colorbar(corrMat)
    plt.title(f'Correlation Matrix for {filename}', fontsize=15)
    plt.show()

In [None]:
# Scatter and density plots
def plotScatterMatrix(df, plotSize, textSize):
    df = df.select_dtypes(include =[np.number]) # keep only numerical columns
    # Remove rows and columns that would lead to df being singular
    df = df.dropna('columns')
    df = df[[col for col in df if df[col].nunique() > 1]] # keep columns where there are more than 1 unique values
    columnNames = list(df)
    if len(columnNames) > 10: # reduce the number of columns for matrix inversion of kernel density plots
        columnNames = columnNames[:10]
    df = df[columnNames]
    ax = pd.plotting.scatter_matrix(df, alpha=0.75, figsize=[plotSize, plotSize], diagonal='kde')
    corrs = df.corr().values
    for i, j in zip(*plt.np.triu_indices_from(ax, k = 1)):
        ax[i, j].annotate('Corr. coef = %.3f' % corrs[i, j], (0.8, 0.2), xycoords='axes fraction', ha='center', va='center', size=textSize)
    plt.suptitle('Scatter and Density Plot')
    plt.show()

In [None]:
nRowsRead = 1000 # specify 'None' if want to read whole file
# submission.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
df1 = pd.read_csv('./submission_densenet.csv', delimiter=',', nrows = nRowsRead)
df1.dataframeName = 'submission_densenet.csv'
nRow, nCol = df1.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
df1.head(5)

In [None]:
plotPerColumnDistribution(df1, 10, 5)

In [None]:
plotCorrelationMatrix(df1, 8)

In [None]:
plotScatterMatrix(df1, 12, 10)

<h2>2. InceptionV3</h2>

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([InceptionV3(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
SVG(tf.keras.utils.model_to_dot(Model(model.layers[0].input, model.layers[0].layers[11].output), dpi=70).create(prog='dot', format='svg'))

In [None]:
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
display_training_curves(
    history.history['categorical_accuracy'], 
    history.history['val_categorical_accuracy'], 
    'accuracy')

In [None]:
#Visualizing train and valid loss
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss')

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='blue', label='Training Accuracy')
plt.plot(epochs, val_acc, color='red', label='Validation Accuracy')
plt.title('Training and Validation Accuracy with InceptionV3')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='blue', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss with InceptionV3')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:

#print("[INFO] Calculating model accuracy")
#val_acc = model.evaluate(x_test, y_test)

#val_acc = model.evaluate(valid_dataset)
#print(f"Test Accuracy: {scores[1]*100}")

In [None]:
model.evaluate(valid_dataset, verbose=1)

In [None]:
print(f"Test Accuracy: {probs_incepv3[1]*100}")

In [None]:
import pickle

In [None]:
# save the model to disk
print("[INFO] Saving model...")
pickle.dump(model,open('InceptionV3_model.pkl', 'wb'))

In [None]:
probs_incepv3 = model.predict(test_dataset, verbose=1)
sub.loc[:, 'healthy':] = probs_incepv3
sub.to_csv('submission_incepv3.csv', index=False)
sub.head()



In [None]:
model.evaluate(test_dataset, verbose=1)
print(f"Test Accuracy: {model.evaluate[1]*100}")

In [None]:

test_predictions = model.predict(test_dataset)
print(f"Test Accuracy: {test_predictions[1]*100}")

In [None]:
fig, ax = plt.subplots(nrows=5, ncols=2, figsize=(15, 18))

for i in range(5):
    img_id = random.choice(np.arange(0,test_df.shape[0]))
    test_image = getImage(test_df.image_id[img_id])
    
    ax[i,0].imshow(test_image)
    ax[i,0].set_title(f'{test_df.image_id[img_id]}', fontsize=12)
    ax[i,1].barh(y=train_df.columns[1:],width=test_predictions[img_id])
    ax[i,1].set_title('Predictions', fontsize=12)
    
fig.suptitle("Test set Predictions",fontsize=20)
    
plt.show()

In [None]:
print(f"Test Accuracy: {scores[1]*100}")

<h2>3. ResNet</h2>

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([ResNet50V2(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
SVG(tf.keras.utils.model_to_dot(Model(model.layers[0].input, model.layers[0].layers[11].output), dpi=70).create(prog='dot', format='svg'))

In [None]:
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
display_training_curves(
    history.history['categorical_accuracy'], 
    history.history['val_categorical_accuracy'], 
    'accuracy')

In [None]:
#Visualizing train and valid loss
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss')

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='blue', label='Training Accuracy')
plt.plot(epochs, val_acc, color='red', label='Validation Accuracy')
plt.title('Training and Validation Accuracy with ResNet')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='blue', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss with ResNet')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
probs_resnet = model.predict(test_dataset, verbose=1)
sub.loc[:, 'healthy':] = probs_resnet
sub.to_csv('submission_resnet.csv', index=False)
sub.head()

#LB:-0.94379

In [None]:
nRowsRead = 1000 # specify 'None' if want to read whole file
# submission.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
df1 = pd.read_csv('./submission_resnet.csv', delimiter=',', nrows = nRowsRead)
df1.dataframeName = 'submission_resnet.csv'
nRow, nCol = df1.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
df1.head(5)

In [None]:
plotPerColumnDistribution(df1, 10, 5)

In [None]:
plotCorrelationMatrix(df1, 8)

In [None]:
plotScatterMatrix(df1, 12, 10)

<h2>4. InceptionResNet</h2>

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([InceptionResNetV2(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
SVG(tf.keras.utils.model_to_dot(Model(model.layers[0].input, model.layers[0].layers[11].output), dpi=70).create(prog='dot', format='svg'))

In [None]:
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
display_training_curves(
    history.history['categorical_accuracy'], 
    history.history['val_categorical_accuracy'], 
    'accuracy')

In [None]:
#Visualizing train and valid loss
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss')

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='blue', label='Training Accuracy')
plt.plot(epochs, val_acc, color='red', label='Validation Accuracy')
plt.title('Training and Validation Accuracy with InceptionResNet')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='blue', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss with InceptionResNet')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
probs_incepres = model.predict(test_dataset, verbose=1)
sub.loc[:, 'healthy':] = probs_incepres
sub.to_csv('submission_incepres.csv', index=False)
sub.head()

#LB:-0.96181

In [None]:
nRowsRead = 1000 # specify 'None' if want to read whole file
# submission.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
df1 = pd.read_csv('./submission_incepres.csv', delimiter=',', nrows = nRowsRead)
df1.dataframeName = 'submission_incepres.csv'
nRow, nCol = df1.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
df1.head(5)

In [None]:
plotPerColumnDistribution(df1, 10, 5)

In [None]:
plotCorrelationMatrix(df1, 8)

In [None]:
plotScatterMatrix(df1, 12, 10)

In [None]:
#5. VGG19

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([VGG19(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
SVG(tf.keras.utils.model_to_dot(Model(model.layers[0].input, model.layers[0].layers[11].output), dpi=70).create(prog='dot', format='svg'))

In [None]:
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
display_training_curves(
    history.history['categorical_accuracy'], 
    history.history['val_categorical_accuracy'], 
    'accuracy')

In [None]:
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss')

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='blue', label='Training Accuracy')
plt.plot(epochs, val_acc, color='red', label='Validation Accuracy')
plt.title('Training and Validation Accuracy with VGG19')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='blue', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss with VGG19')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
probs_incepres = model.predict(test_dataset, verbose=1)
sub.loc[:, 'healthy':] = probs_incepres
sub.to_csv('submission_vgg19.csv', index=False)
sub.head()


In [None]:
nRowsRead = 1000 # specify 'None' if want to read whole file
# submission.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
df1 = pd.read_csv('./submission_vgg19.csv', delimiter=',', nrows = nRowsRead)
df1.dataframeName = 'submission_vgg19.csv'
nRow, nCol = df1.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
df1.head(5)

In [None]:
plotPerColumnDistribution(df1, 10, 5)

In [None]:
plotCorrelationMatrix(df1, 8)

In [None]:
plotScatterMatrix(df1, 12, 10)

In [None]:
#6. VGG16

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([VGG16(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
SVG(tf.keras.utils.model_to_dot(Model(model.layers[0].input, model.layers[0].layers[11].output), dpi=70).create(prog='dot', format='svg'))

In [None]:
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
display_training_curves(
    history.history['categorical_accuracy'], 
    history.history['val_categorical_accuracy'], 
    'accuracy')

In [None]:
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss')

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='blue', label='Training Accuracy')
plt.plot(epochs, val_acc, color='red', label='Validation Accuracy')
plt.title('Training and Validation Accuracy with VGG16')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='blue', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss with VGG16')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
probs_incepres = model.predict(test_dataset, verbose=1)
sub.loc[:, 'healthy':] = probs_incepres
sub.to_csv('submission_vgg16.csv', index=False)
sub.head()


In [None]:
nRowsRead = 1000 # specify 'None' if want to read whole file
# submission.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
df1 = pd.read_csv('./submission_vgg16.csv', delimiter=',', nrows = nRowsRead)
df1.dataframeName = 'submission_vgg16.csv'
nRow, nCol = df1.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
df1.head(5)

In [None]:
plotPerColumnDistribution(df1, 10, 5)

In [None]:
plotCorrelationMatrix(df1, 8)

In [None]:
plotScatterMatrix(df1, 12, 10)

<h2>5. Ensembling</h2>

In [None]:
ensemble_1, ensemble_2, ensemble_3 = [sub]*3

# probs_incepv3

ensemble_1.loc[:, 'healthy':] = 0.50*probs_dnn + 0.50*probs_incepres
ensemble_2.loc[:, 'healthy':] = 0.75*probs_dnn + 0.20*probs_incepres + 0.05*probs_incepv3
ensemble_3.loc[:, 'healthy':] = 0.80*probs_dnn + 0.20*probs_incepres

ensemble_1.to_csv('submission_ensemble_1.csv', index=False) #LB :-0.96970
ensemble_2.to_csv('submission_ensemble_2.csv', index=False) #LB :-0.96970
ensemble_3.to_csv('submission_ensemble_3.csv', index=False) #LB :-0.96970

In [None]:
#InceptionResNet

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([InceptionResNetV2(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
SVG(tf.keras.utils.model_to_dot(Model(model.layers[0].input, model.layers[0].layers[11].output), dpi=70).create(prog='dot', format='svg'))

In [None]:
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
display_training_curves(
    history.history['categorical_accuracy'], 
    history.history['val_categorical_accuracy'], 
    'accuracy')

In [None]:
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'loss')

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='blue', label='Training Accuracy')
plt.plot(epochs, val_acc, color='red', label='Validation Accuracy')
plt.title('Training and Validation Accuracy with InceptionResNetV2')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='blue', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss with InceptionResNetV2')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
def process(img):
    return cv2.resize(img/255.0, (512, 512)).reshape(-1, 512, 512, 3)
def predict(img):
    return model.layers[2](model.layers[1](model.layers[0](process(img)))).numpy()[0]

fig = make_subplots(rows=4, cols=2)
preds = predict(train_images[2])

colors = {"Healthy":px.colors.qualitative.Plotly[0], "Scab":px.colors.qualitative.Plotly[0], "Rust":px.colors.qualitative.Plotly[0], "Multiple diseases":px.colors.qualitative.Plotly[0]}
if list.index(preds.tolist(), max(preds)) == 0:
    pred = "Healthy"
if list.index(preds.tolist(), max(preds)) == 1:
    pred = "Multiple diseases"
if list.index(preds.tolist(), max(preds)) == 2:
    pred = "Rust"
if list.index(preds.tolist(), max(preds)) == 3:
    pred = "Scab"

colors[pred] = px.colors.qualitative.Plotly[1]
colors["Healthy"] = "seagreen"
colors = [colors[val] for val in colors.keys()]
fig.add_trace(go.Image(z=cv2.resize(train_images[2], (205, 136))), row=1, col=1)
fig.add_trace(go.Bar(x=["Healthy", "Multiple diseases", "Rust", "Scab"], y=preds, marker=dict(color=colors)), row=1, col=2)
fig.update_layout(height=1200, width=800, title_text="DenseNet Predictions", showlegend=False)

preds = predict(train_images[0])
colors = {"Healthy":px.colors.qualitative.Plotly[0], "Multiple diseases":px.colors.qualitative.Plotly[0], "Rust":px.colors.qualitative.Plotly[0], "Scab":px.colors.qualitative.Plotly[0]}
if list.index(preds.tolist(), max(preds)) == 0:
    pred = "Healthy"
if list.index(preds.tolist(), max(preds)) == 1:
    pred = "Multiple diseases"
if list.index(preds.tolist(), max(preds)) == 2:
    pred = "Rust"
if list.index(preds.tolist(), max(preds)) == 3:
    pred = "Scab"
    
colors[pred] = px.colors.qualitative.Plotly[1]
colors["Multiple diseases"] = "seagreen"
colors = [colors[val] for val in colors.keys()]
fig.add_trace(go.Image(z=cv2.resize(train_images[0], (205, 136))), row=2, col=1)
fig.add_trace(go.Bar(x=["Healthy", "Multiple diseases", "Rust", "Scab"], y=preds, marker=dict(color=colors)), row=2, col=2)

preds = predict(train_images[3])
colors = {"Healthy":px.colors.qualitative.Plotly[0], "Multiple diseases":px.colors.qualitative.Plotly[0], "Rust":px.colors.qualitative.Plotly[0], "Scab":px.colors.qualitative.Plotly[0]}
if list.index(preds.tolist(), max(preds)) == 0:
    pred = "Healthy"
if list.index(preds.tolist(), max(preds)) == 1:
    pred = "Multiple diseases"
if list.index(preds.tolist(), max(preds)) == 2:
    pred = "Rust"
if list.index(preds.tolist(), max(preds)) == 3:
    pred = "Scab"
    
colors[pred] = px.colors.qualitative.Plotly[1]
colors["Rust"] = "seagreen"
colors = [colors[val] for val in colors.keys()]
fig.add_trace(go.Image(z=cv2.resize(train_images[3], (205, 136))), row=3, col=1)
fig.add_trace(go.Bar(x=["Healthy", "Multiple diseases", "Rust", "Scab"], y=preds, marker=dict(color=colors)), row=3, col=2)

preds = predict(train_images[1])
colors = {"Healthy":px.colors.qualitative.Plotly[0], "Multiple diseases":px.colors.qualitative.Plotly[0], "Rust":px.colors.qualitative.Plotly[0], "Scab":px.colors.qualitative.Plotly[0]}
if list.index(preds.tolist(), max(preds)) == 0:
    pred = "Healthy"
if list.index(preds.tolist(), max(preds)) == 1:
    pred = "Multiple diseases"
if list.index(preds.tolist(), max(preds)) == 2:
    pred = "Rust"
if list.index(preds.tolist(), max(preds)) == 3:
    pred = "Scab"
    
colors[pred] = px.colors.qualitative.Plotly[1]
colors["Scab"] = "seagreen"
colors = [colors[val] for val in colors.keys()]
fig.add_trace(go.Image(z=cv2.resize(train_images[1], (205, 136))), row=4, col=1)
fig.add_trace(go.Bar(x=["Healthy", "Multiple diseases", "Rust", "Scab"], y=preds, marker=dict(color=colors)), row=4, col=2)

fig.update_layout(template="plotly_white")

In [None]:
probs_incepres = model.predict(test_dataset, verbose=1)
sub.loc[:, 'healthy':] = probs_incepres
sub.to_csv('submission_InceptionResNetV2.csv', index=False)
sub.head()

In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import re
from tensorflow.keras.applications.resnet  import  ResNet50 as resNet
from tensorflow.keras.models import Sequential
from PIL import Image, ImageDraw, ImageEnhance
import albumentations as albu

In [None]:
img_width,img_height = 512,512

root_path = "../input/plant-pathology-2020-fgvc7/"
train_path = root_path+"/images"
test_path =  root_path+"/images"
train_csv_path =  "../input/plant-pathology-2020-fgvc7/train.csv"
sample_path =  "../input/plant-pathology-2020-fgvc7/sample_submission.csv"

In [None]:
total_df = pd.read_csv(train_csv_path)
total_ids =  [i.split(".")[0] for i in  os.listdir(train_path)]
total_df.head()

In [None]:
def draw_bboxes(bboxs,img):
    color = (255, 0, 0) 
    thickness = 3
    for cur_box  in bboxs:
        start_point = (cur_box[0],cur_box[1])
        end_point = (cur_box[2]+cur_box[0],cur_box[3]+cur_box[1])
   
        
        cv2.rectangle(img, start_point, end_point, color, thickness) 

    return img

In [None]:
def get_bboxes(image_id):
    selected_df = total_df[total_df['image_id'] == image_id]
    
    image_bboxes = selected_df['bbox']
    box_scores = selected_df['bbox']
    bboxes = []
    for row in image_bboxes:
        row=row.replace(" ", "")
        x1y1x2y2 = np.array(re.findall("([0-9]+[.]?[0-9]*)", row))
        x1y1x2y2 = np.float32(x1y1x2y2)
        x1y1x2y2 = x1y1x2y2
        x1y1x2y2 =  np.int32(x1y1x2y2)
  
        bboxes.append(x1y1x2y2)
           
        
    return bboxes

In [None]:
def read_image(image_id):
    path = train_path+'/'+str(image_id)+'.jpg'
    image = Image.open(path)
    image = image.resize((img_width,img_height))
    
    return np.asarray(image)

In [None]:
###SEGMENTATION

In [None]:
!git clone https://github.com/jakeret/unet
!pip install ../working/unet/

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.preprocessing import minmax_scale
import random
import cv2
from imgaug import augmenters as iaa
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

from unet import utils
from unet.datasets import circles
import unet

In [None]:
training_folder = '/kaggle/input/plant-2020-seg/plant_2020/train/'
samples_df = pd.read_csv("/kaggle/input/plant-2020-seg/plant_2020/train1.csv")
samples_df["label"] = samples_df["label"].astype("str")
samples_df.head()

In [None]:
samples_df = samples_df.query("label=='2'")

In [None]:
training_percentage = 0.8
training_item_count = int(len(samples_df)*training_percentage)
validation_item_count = len(samples_df)-int(len(samples_df)*training_percentage)
training_df = samples_df[:training_item_count]
validation_df = samples_df[training_item_count:]

In [None]:
def get_ECI_band(img):
    '''
    Return the ECI band calculated from an RGB image between 0 and 255
    using the formula below:    
    ECI = (red_channel-1)^2 + green_channel^2/0.16
    '''
    img = img/255.
    img = cv2.GaussianBlur(img,(35,35),0)
    ECI_band = np.power(img[:,:,0]-1,2) + np.power(img[:,:,1],2)/0.16
    normalized_ECI_band = (ECI_band/ECI_band.max()*255).astype(np.uint8)
    return normalized_ECI_band


def get_CIVE_band(img):
    '''
    Return the CIVE band calculated from an RGB image between 0 and 255
    using the formula below:
    CIVE = 0.441*red_channel - 0.881*green_channel + 0.385*blue_channel + 18.787
    '''
    img = cv2.GaussianBlur(img,(35,35),0)
    CIVE_band = 0.441*img[:,:,0] - 0.881*img[:,:,1] + 0.385*img[:,:,2] + 18.787
    normalized_CIVE_band = (((CIVE_band+abs(CIVE_band.min()))/CIVE_band.max())).astype(np.uint8)
    return normalized_CIVE_band


def apply_ECI_mask(img, vegetation_index_band):
    '''
    Apply a binary mask on an image and return the masked image
    '''
    ret, otsu = cv2.threshold(vegetation_index_band,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    masked_img = cv2.bitwise_and(img,img,mask = otsu)
    return masked_img


def apply_CIVE_mask(img, vegetation_index_band):
    '''
    Apply a binary mask on an image and return the masked image
    '''
    ret, otsu = cv2.threshold(vegetation_index_band,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    masked_img = cv2.bitwise_and(img,img,mask = otsu)
    return masked_img

In [None]:
plt.figure(figsize=(20, 20))
items = 6
for idx, image_id in enumerate(samples_df.image_id[:items]):
    img_path = training_folder+image_id
    img = np.array(Image.open(img_path))
    ax = plt.subplot(items, 3, idx*3 + 1)
    ax.set_title("original")
    plt.imshow(img)
    
    ECI_band =  get_ECI_band(img)
    ax = plt.subplot(items, 3, idx*3 + 2)
    ax.set_title("ECI band")
    plt.imshow(ECI_band)
    
    masked_img = apply_ECI_mask(img, ECI_band)
    ax = plt.subplot(items, 3, idx*3 + 3)
    ax.set_title("ECI+Otsu")
    plt.imshow(masked_img)

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import numpy as np 
import cv2
# load image
image = cv2.imread('../input/plant-2020-seg/plant_2020/train/Train_1000.jpg')
# create hsv
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 # set lower and upper color limits
low_val = (0,60,0)
high_val = (179,255,255)
# Threshold the HSV image 
mask = cv2.inRange(hsv, low_val,high_val)
# remove noise
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel=np.ones((8,8),dtype=np.uint8))
# apply mask to original image
result = cv2.bitwise_and(image, image,mask=mask)

#show image
plt.imshow("Result", result)
plt.imshow("Mask", mask)
plt.imshow("Image", image)

cv2.waitKey(0)
cv2.destroyAllWindows()

<h1>References</h1>

[https://www.kaggle.com/tarunpaparaju/plant-pathology-2020-eda-models/notebook](http://)

https://www.kaggle.com/pestipeti/eda-plant-pathology-2020