In [None]:
#-------------------
# importing libraries
#-------------------
import tensorflow as tf
from tensorflow.keras import layers
from kaggle_datasets import KaggleDatasets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import PIL
import shutil
import csv

import os
os.system('pip install /kaggle/input/keras-zip -q')
os.system('pip install /kaggle/input/efficientnet-zip/ -q --no-deps')

import efficientnet.tfkeras as efn

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path('plant-pathology-2021-fgvc8')

TRAIN_PATH = GCS_DS_PATH + "/train_images/"

train_df = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")

count_dict = train_df.labels.value_counts()
classes = list(count_dict.index)
classes_count = list(count_dict.values)
print("Number of unique labels: ",len(classes))

label2id = {
    'scab': 0,
    'frog_eye_leaf_spot' : 1,
    'rust' : 2,
    'complex' : 3,
    'powdery_mildew' : 4,
}
NUM_CLASSES = len(label2id)    
id2label = dict([(value, key) for key, value in label2id.items()])
train_df["labels"] = train_df["labels"].map(lambda x : [i for i in x.split(" ") if i != "healthy"])
train_df["labels"] = train_df["labels"].map(lambda x : [label2id[i] for i in x])

train_df.head()

In [None]:
#--------------
#initialize constants
#--------------
HEIGHT,WIDTH = 512,512
CHANNELS = 3
BATCH_SIZE = 8 * strategy.num_replicas_in_sync
SEED = 143
SPLIT = int(0.9*len(train_df))
AUTO = tf.data.experimental.AUTOTUNE
STEPS_PER_EPOCH  = SPLIT//BATCH_SIZE
VALID_STEPS = (len(train_df)-SPLIT)//BATCH_SIZE

# Data Preprocessing Functions

In [None]:
# Preprocess the Image
def process_img(filepath,label):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=CHANNELS)
    image = tf.image.convert_image_dtype(image, tf.float32) 
    image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image,label

# For Data Augmentation
def data_augment(image, label):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    if p_spatial > .75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > .75:
        image = tf.image.rot90(image, k=3) 
    elif p_rotate > .5:
        image = tf.image.rot90(image, k=2) 
    elif p_rotate > .25:
        image = tf.image.rot90(image, k=1) 
        
    
    if p_pixel_1 >= .4:
        image = tf.image.random_saturation(image, lower=.7, upper=1.3)
    if p_pixel_2 >= .4:
        image = tf.image.random_contrast(image, lower=.8, upper=1.2)
    if p_pixel_3 >= .4:
        image = tf.image.random_brightness(image, max_delta=.1)
        
    
    if p_crop > .7:
        if p_crop > .9:
            image = tf.image.central_crop(image, central_fraction=.7)
        elif p_crop > .8:
            image = tf.image.central_crop(image, central_fraction=.8)
        else:
            image = tf.image.central_crop(image, central_fraction=.9)
    elif p_crop > .4:
        crop_size = tf.random.uniform([], int(HEIGHT*.8), HEIGHT, dtype=tf.int32)
        image = tf.image.random_crop(image, size=[crop_size, crop_size, CHANNELS])
    
    image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image,label

# Data Pipeline

In [None]:
files_ls = tf.io.gfile.glob(TRAIN_PATH + '*.jpg')
labels = np.zeros((len(train_df),NUM_CLASSES))

for i,file in enumerate(train_df.values):
    labels[i][train_df.iloc[i]["labels"]] = 1
    
dataset = tf.data.Dataset.from_tensor_slices((files_ls,labels))
dataset = dataset.map(process_img,num_parallel_calls=AUTO)
dataset = dataset.map(data_augment,num_parallel_calls=AUTO)

train_ds = dataset.take(SPLIT)
val_ds = dataset.skip(SPLIT)

train_ds = train_ds.cache().repeat().shuffle(2048).batch(BATCH_SIZE).prefetch(AUTO)
val_ds = val_ds.cache().repeat().batch(BATCH_SIZE).prefetch(AUTO)
print("Data Pipeline")

# Model

In [None]:
def create_baseModel():
    base_model = efn.EfficientNetB7(include_top=False,
                                input_shape=[HEIGHT,WIDTH,CHANNELS],
                                weights='noisy-student')
    # Freeze the base model
    base_model.trainable = False
    return base_model

def get_trainable(model):
    for layer in model.layers:
        layer.trainable = True
    
def create_model():
    base_model = create_baseModel()
    
#     x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
#     x = tf.keras.layers.Dense(2560, activation ='relu')(x)
#     x = tf.keras.layers.Dropout(0.5)(x)
    
    ################################ Version2 ###############################
    x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(1280, activation ='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
################################################################################
    
    outputs = tf.keras.layers.Dense(NUM_CLASSES,activation="sigmoid", dtype='float32')(x)

    model = tf.keras.Model(inputs = base_model.input, outputs = outputs)
    
    return model

# Compiling the Model

In [None]:
import tensorflow_addons as tfa
import sklearn

def compile_model(model, lr=0.001):
    
    optimizer = tf.keras.optimizers.Adam(lr=lr)
    
    loss = tf.keras.losses.BinaryCrossentropy()  
    metrics = [
    
       tfa.metrics.F1Score(num_classes = NUM_CLASSES,average = "macro", name = "f1_score"),
       tf.keras.metrics.BinaryAccuracy(name='acc')
    ]

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    return model

# Callbacks Function

In [None]:
METRIC = "val_f1_score"

def create_callbacks(metric = METRIC):
    
    cpk_path = './B7model.h5'
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=cpk_path,
        monitor= metric,
        mode='max',
        save_best_only=True,
        verbose=1,
    )

    reducelr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor= metric,
        mode='max',
        factor=0.1,
        patience=3,
        verbose=0
    )

    earlystop = tf.keras.callbacks.EarlyStopping(
        monitor= metric,
        mode='max',
        patience=10, 
        verbose=1
    )
    
    callbacks = [checkpoint, reducelr, earlystop]         
    
    return callbacks

# Training

In [None]:
# model = load_model("../input/sjmodel/best_model.h5")

In [None]:
EPOCHS= 30
VERBOSE =1

tf.keras.backend.clear_session()

with strategy.scope():
    
    model = create_model()
    model = compile_model(model, lr=0.001)
    
    callbacks = create_callbacks()
    
    history = model.fit(
                        train_ds,
                        epochs=EPOCHS,
                        callbacks=callbacks,
                        validation_data = val_ds,
                        verbose=VERBOSE,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        validation_steps=VALID_STEPS
                       )
    

In [None]:
def plot_history(hist):
    history = hist
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(len(history.history['val_loss']))
    plt.figure(figsize=(8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation  Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation  Accuracy')
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

In [None]:
plot_history(history)

In [None]:
get_trainable(model)

In [None]:
EPOCHS= 50
VERBOSE =1

with strategy.scope():
    
    model = compile_model(model,lr= 0.00001)
    
    callbacks = create_callbacks()
    
    history_unfreeze = model.fit(
                        train_ds,
                        epochs=EPOCHS,
                        callbacks=callbacks,
                        validation_data = val_ds,
                        verbose=VERBOSE,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        validation_steps=VALID_STEPS
                       )

# History plotting

In [None]:
plot_history(history_unfreeze)

# Prediction

In [None]:
# TEST_PATH = GCS_DS_PATH + "/test_images/"

In [None]:
names = os.listdir('../input/plant-pathology-2021-fgvc8/test_images/')

In [None]:
def _parse(name):
#     with open('../input/plant-pathology-2021-fgvc8/test_images/'+ name, "rb") as local_file:
#         image_string = local_file.read()
    image_string = tf.io.read_file('../input/plant-pathology-2021-fgvc8/test_images/' + name)
    image_decoded = tf.image.decode_jpeg(image_string)
    imgs = tf.image.resize(image_decoded, [WIDTH, HEIGHT])
    return imgs/255



dataset = tf.data.Dataset.from_tensor_slices((tf.constant(names)))\
                               .map(_parse, num_parallel_calls=tf.data.AUTOTUNE)\
                               .batch(32)\
                               .prefetch(tf.data.AUTOTUNE)
print(dataset)

In [None]:
y_pred = model.predict(dataset, verbose=1)

In [None]:
label_names=['scab','frog_eye_leaf_spot','rust','complex','powdery_mildew']

y = np.around(y_pred)
i = 0
labels = []
for i in range(len(y)):
    check = 0
    vec = str()
    for j in range(len(label_names)):
        if(y[i][j]==1):
            check+=1
            vec = vec + label_names[j] + " "
    if(check==0):
        vec = vec + 'healthy'
    labels.append(vec)

In [None]:
df = pd.DataFrame({'image':names, 'labels':labels})

In [None]:
df.head()

In [None]:
df.to_csv('./submission.csv', index=False)