In [None]:
!pip install -q efficientnet

In [None]:
import os
import gc
import re

import cv2
import math
import numpy as np
import scipy as sp
import pandas as pd

from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import MultiLabelBinarizer
import efficientnet.tfkeras as efn

import tensorflow as tf
from IPython.display import SVG
from keras.utils import plot_model
import tensorflow.keras.layers as L
from keras.utils import model_to_dot
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.applications import NASNetLarge

import seaborn as sns
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

tqdm.pandas()
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

np.random.seed(0)
tf.random.set_seed(0)

import warnings
warnings.filterwarnings("ignore")

In [None]:
EPOCHS = 10
SAMPLE_LEN = 100
IMAGE_PATH = "../input/plant-pathology-2021-fgvc8/train_images/"
TRAIN_PATH = "../input/plant-pathology-2021-fgvc8/train.csv"
SUB_PATH = "../input/plant-pathology-2021-fgvc8/sample_submission.csv"

sub = pd.read_csv(SUB_PATH)
train_data = pd.read_csv(TRAIN_PATH)
# test_data = pd.read_csv(TEST_PATH)

In [None]:
classes = {}

for index, row in train_data.iterrows():
    curLabels = row['labels'].split(' ')
    if len(curLabels) > 1:
        continue
    c = curLabels[0]
    classes[c] = classes.get(c, 0) + 1
    
print(classes)

In [None]:
dist_label=train_data['labels'].unique()

In [None]:
allLabels = train_data['labels'].unique()
uniqueLabels = []

for i in allLabels:
    curLabels = i.split(' ')
    for j in curLabels:
        if j not in uniqueLabels:
            uniqueLabels.append(j)

for i in uniqueLabels:
    train_data[i] = [0] * train_data.shape[0]


for index, row in train_data.iterrows():
    curLabels = row['labels'].split(' ')
    for i in uniqueLabels:
        if i in curLabels:
            train_data.loc[index, i] = 1

train_data.head()

In [None]:
train_data, valid_data = train_test_split(train_data, test_size=0.15, random_state=1010)

In [None]:
print(train_data.shape)
print(valid_data.shape)

In [None]:
image_data_generator = ImageDataGenerator(
    rescale=1./255.0,
    rotation_range=15,
    height_shift_range=0.10,
    width_shift_range=0.10,
    brightness_range=(0.8, 1.2),
    zoom_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest"
)

columns = ['healthy', 'scab', 'frog_eye_leaf_spot', 'complex', 'rust', 'powdery_mildew']

train_generator = image_data_generator.flow_from_dataframe(
    dataframe=train_data,
    directory=IMAGE_PATH,
    x_col='image',
    y_col=columns,
    color_mode="rgb",
    class_mode='raw',
    seed=1010,
    shuffle=True,
    target_size=(224, 224),
    batch_size=16
)

valid_generator = image_data_generator.flow_from_dataframe(
    dataframe=valid_data,
    directory=IMAGE_PATH,
    x_col='image',
    y_col=columns,
    color_mode="rgb",
    class_mode='raw',
    seed=1010,
    shuffle=True,
    target_size=(224, 224),
    batch_size=16
)

In [None]:
model = tf.keras.Sequential([efn.EfficientNetB7(input_shape=(224, 224, 3),
                                                weights='imagenet',
                                                include_top=False),
                             L.GlobalAveragePooling2D(),
                             L.Dense(6, activation='sigmoid')])



model.compile(optimizer='adam',
              loss = 'binary_crossentropy',
              metrics=['accuracy'])
model.summary()

In [None]:
def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):

    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) *\
                 lr_exp_decay**(epoch - lr_rampup_epochs\
                                - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

In [None]:
lrfn = build_lrfn()
STEPS_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEPS_SIZE_VALID = valid_generator.n//valid_generator.batch_size
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)

In [None]:
import keras

class CustomSaver(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if epoch % 5 == 0: 
            self.model.save("./model_{}.hd5".format(epoch))

In [None]:
saver = CustomSaver()

history = model.fit_generator(train_generator,
                    epochs=EPOCHS,
                    steps_per_epoch=STEPS_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEPS_SIZE_VALID,
                    callbacks=[lr_schedule, saver],)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

def data_aug(curr_file,number,save_to):
    data_gen=ImageDataGenerator(rotation_range=8,
                                width_shift_range=0.1,
                                height_shift_range=0.1,
                                shear_range=0.1,
                                brightness_range=(0.8, 1.2),
                                horizontal_flip=True,
                                vertical_flip=True,
                                fill_mode='nearest'        
                                )
    
    for filename in tqdm(os.listdir(curr_file)):
        image=cv2.imread(curr_file+filename)
        image = image.reshape((1,)+image.shape)
        save_prefix="aug_"+filename[:-4]
        i=0
        for batch in data_gen.flow(x=image,batch_size=1,save_to_dir=save_to,save_prefix=save_prefix,
                                   save_format='jpg'):
            i+=1
            if i>number:
                break

In [None]:
bgr = cv2.imread('../input/plant-pathology-2021-fgvc8/train_images/800113bb65efe69e.jpg')

lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)

lab_planes = cv2.split(lab)

clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))

lab_planes[0] = clahe.apply(lab_planes[0])

lab = cv2.merge(lab_planes)

bgr = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

In [None]:
#Creating an object of CLAHE with tilesize (8,8)
clahe = cv2.createCLAHE(clipLimit = 2.0, tileGridSize=(8,8))

img = cv2.imread('../input/plant-pathology-2021-fgvc8/train_images/800113bb65efe69e.jpg')
original_img = img
resized_img = cv2.resize(original_img, (600, 600))
# BGR->GRAY
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
#applying CLAHE on Image
gray_img = clahe.apply(img)
# Converting back GRAY->RGB
final_img = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2RGB)
# Resizing image to (600, 600)
resized_final_img = cv2.resize(final_img, (600, 600))

In [None]:
cv2.imwrite('./original_image.jpg', original_img)
cv2.imwrite('./resized_image.jpg', resized_img)
cv2.imwrite('./final_image.jpg', final_img)
cv2.imwrite('./resized_final_image.jpg', resized_final_img)

In [None]:
dist_label=train_data['labels'].unique()

In [None]:
allLabels = train_data['labels'].unique()
uniqueLabels = []

for i in allLabels:
    curLabels = i.split(' ')
    for j in curLabels:
        if j not in uniqueLabels:
            uniqueLabels.append(j)

for i in uniqueLabels:
    train_data[i] = [0] * train_data.shape[0]


allLabs = []
for index, row in train_data.iterrows():
    allLabs.append(row['labels'])
    curLabels = row['labels'].split(' ')
    for i in uniqueLabels:
        if i in curLabels:
            train_data.loc[index, i] = 1

train_data.head()

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)

BATCH_SIZE = 16 * strategy.num_replicas_in_sync
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

In [None]:
def format_path(st):
    return GCS_DS_PATH + '/train_images/' + st

train_paths = train_data.image.apply(format_path).values

train_labels = np.float32(train_data.loc[:, 'healthy':'powdery_mildew'].values)

In [None]:
train_paths, valid_paths, train_labels, valid_labels =\
train_test_split(train_paths, train_labels, test_size=0.15, random_state=1010)

In [None]:
IMAGE_SIZE = [600, 600]

def decode_image(filename, label=None, image_size=(600, 600)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, 0.2)
    image = tf.image.random_contrast(image, 0.9, 1.1)
    
    if label is None:
        return image
    else:
        return image, label

    
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear = math.pi * shear / 180.
    
    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    rotation_matrix = tf.reshape( tf.concat([c1,s1,zero, -s1,c1,zero, zero,zero,one],axis=0),[3,3] )
        
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_matrix = tf.reshape( tf.concat([one,s2,zero, zero,c2,zero, zero,zero,one],axis=0),[3,3] )    
    
    # ZOOM MATRIX
    zoom_matrix = tf.reshape( tf.concat([one/height_zoom,zero,zero, zero,one/width_zoom,zero, zero,zero,one],axis=0),[3,3] )
    
    # SHIFT MATRIX
    shift_matrix = tf.reshape( tf.concat([one,zero,height_shift, zero,one,width_shift, zero,zero,one],axis=0),[3,3] )
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))


def transform(image,label):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    DIM = IMAGE_SIZE[0]
    XDIM = DIM%2 #fix for size 331
    
    rot = 15. * tf.random.normal([1],dtype='float32')
    shr = 1.0 * tf.random.normal([1],dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    w_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    h_shift = 10. * tf.random.normal([1],dtype='float32') 
    w_shift = 10. * tf.random.normal([1],dtype='float32') 
      
    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image,tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3]),label

    

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .map(data_augment, num_parallel_calls=AUTO)
    .map(transform, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)

# test_dataset = (
#     tf.data.Dataset
#     .from_tensor_slices(test_paths)
#     .map(decode_image, num_parallel_calls=AUTO)
#     .batch(BATCH_SIZE)
# )

In [None]:
def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):
    lr_max = lr_max * strategy.num_replicas_in_sync

    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) *\
                 lr_exp_decay**(epoch - lr_rampup_epochs\
                                - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

In [None]:
lrfn = build_lrfn()
STEPS_PER_EPOCH = train_labels.shape[0] // BATCH_SIZE
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([efn.EfficientNetB7(input_shape=(600, 600, 3),
                                                    weights='imagenet',
                                                    include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_labels.shape[1], activation='sigmoid')])



    model.compile(optimizer='adam',
                  loss = 'binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

In [None]:
# with strategy.scope():
#     inputs = L.Input((331, 331, 3))
#     base_model = NASNetLarge(include_top=False, input_shape=(331, 331, 3), weights='imagenet')
#     x = base_model(inputs)
#     out1 = L.GlobalMaxPooling2D()(x)
#     out2 = L.GlobalAveragePooling2D()(x)
#     out3 = L.Flatten()(x)
#     out = L.Concatenate(axis=-1)([out1, out2, out3])
#     out = L.Dropout(0.3)(out)
#     out = L.Dense(train_labels.shape[1], activation='sigmoid', name="3_")(out)
#     model = Model(inputs, out)
#     model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
#     model.summary()

In [None]:
SVG(tf.keras.utils.model_to_dot(model, dpi=70).create(prog='dot', format='svg'))

In [None]:
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

In [None]:
def display_training_curves(training, validation, yaxis):
    if yaxis == "loss":
        ylabel = "Loss"
        title = "Loss vs. Epochs"
    else:
        ylabel = "Accuracy"
        title = "Accuracy vs. Epochs"
        
    fig = go.Figure()
        
    fig.add_trace(
        go.Scatter(x=np.arange(1, EPOCHS+1), mode='lines+markers', y=training, marker=dict(color="dodgerblue"),
               name="Train"))
    
    fig.add_trace(
        go.Scatter(x=np.arange(1, EPOCHS+1), mode='lines+markers', y=validation, marker=dict(color="darkorange"),
               name="Val"))
    
    fig.update_layout(title_text=title, yaxis_title=ylabel, xaxis_title="Epochs", template="plotly_white")
    fig.show()

In [None]:
display_training_curves(
    history.history['acc'], 
    history.history['val_acc'], 
    'accuracy')

In [None]:
display_training_curves(
    history.history['loss'], 
    history.history['val_loss'], 
    'accuracy')

In [None]:
model.save('model.h5')

In [None]:
from tensorflow import keras
model = keras.models.load_model('./model.h5')

In [None]:
model.summary()

In [None]:
# TEST_DATA_PATH = '../input/plant-pathology-2021-fgvc8/test_images/'

TEST_DATA_PATH = GCS_DS_PATH + '/test_images/'

def load_image(filename):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, (331,331))
    return tf.reshape(image, [-1, 331, 331, 3])


labels = ['healthy', 'scab', 'frog_eye_leaf_spot', 'complex', 'rust', 'powdery_mildew']
# print(sub.head())

Images = []
Labels = []

THRESHOLD = 0.5 

i = 0
for img in os.listdir(TEST_DATA_PATH):
    Images.append(img)
    img = load_image(TEST_DATA_PATH + img)
    predictions = model.predict(img)
    print(predictions)
    
    preds = []
    curPred = []
    index = 0
    for pred in predictions[0]:
        if pred >= THRESHOLD:
            curPred.append(labels[index])
        preds.append((index, pred))
        index += 1
    
    preds.sort(key = lambda x: x[1], reverse=True)
    print(preds)
    
    if preds[0][1] < THRESHOLD:
        curPred = []
        curPred.append(labels[preds[0][0]])
    i += 1
    
    Labels.append(' '.join(curPred))


dict = {'image': Images, 'labels': Labels}
df = pd.DataFrame(dict)
df.to_csv('submission.csv', index=False)    

sub = pd.read_csv('submission.csv')
print(sub.head())