# Load the libraries

In [None]:
%matplotlib inline
import math, re, os

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from kaggle_datasets import KaggleDatasets
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint

# Enable the TPU

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

# Load data

In [None]:
# Data access
#GCS_DS_PATH = KaggleDatasets().get_gcs_path()
# Configuration
NUM_CLASSES=10
EPOCHS = 10
BATCH_SIZE = 8 #* strategy.num_replicas_in_sync
IMG_SIZE = 600

In [None]:
train_dir="../input/c/plant-pathology-2021-fgvc8/train_images/"
test_dir="../input/c/plant-pathology-2021-fgvc8/test_images/"
df_train=pd.read_csv('../input/c/plant-pathology-2021-fgvc8/train.csv')
df_sub = pd.read_csv('../input/c/plant-pathology-2021-fgvc8/sample_submission.csv')


In [None]:
df_train.head()

In [None]:
def format_train_path(fname):
    return train_dir+fname

In [None]:
train_paths = df_train.image.apply(format_train_path)

In [None]:
labels = df_train['labels'].apply(pd.Series, 1).stack()
labels.index = labels.index.droplevel(-1)
target_labels = pd.get_dummies(labels).groupby(level=0).sum()
target_labels.head()

In [None]:
# labels_dict = dict(zip(list(labels.value_counts().index), np.arange(labels.value_counts().shape[0])))
# target_labels = labels.map(labels_dict)

In [None]:
# labels = df_train['labels'].apply(pd.Series, 1).stack()
# labels.index = labels.index.droplevel(-1)
# target_labels = pd.get_dummies(labels).groupby(level=0).sum()
# target_labels.head()

In [None]:
labels.value_counts().plot.bar(figsize=(15,5))
plt.show()

In [None]:
train_paths, valid_paths, train_labels, valid_labels = train_test_split(
    train_paths, target_labels, test_size=0.1, random_state=42)

In [None]:
STEPS_PER_EPOCH = train_paths.shape[0] // BATCH_SIZE

# Image Pre-processing

In [None]:
data_augmentation = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomFlip("horizontal"),
  layers.experimental.preprocessing.RandomRotation(0.5),
  layers.experimental.preprocessing.RandomFlip("vertical"),
  layers.experimental.preprocessing.RandomZoom(.2, .2)
])
def decode_image(filename, label=None, image_size=(IMG_SIZE, IMG_SIZE)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None, seed=42):
    image = tf.expand_dims(image, 0)
    image = data_augmentation(image)[0]
    seed = tf.random.experimental.stateless_split([seed,IMG_SIZE], num=1)[0, :]
    image = tf.image.stateless_random_brightness(image, max_delta=0.2, seed=seed)
    #image = tf.image.stateless_random_contrast(image,0.01,0.1, seed=seed)
    
    if label is None:
        return image
    else:
        return image, label

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .cache()
    .map(data_augment, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)


In [None]:
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [None]:
from keras import backend as K

def recall(y_true, y_pred):
    num = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    den = K.sum(K.round(K.clip(y_true, 0, 1))) + K.epsilon()
    recall = num / den
    return recall

def precision(y_true, y_pred):
    num = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    den = K.sum(K.round(K.clip(y_pred, 0, 1))) + K.epsilon()
    precision = num / den
    return precision

def f1(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2*((p*r)/(p+r+K.epsilon()))

In [None]:
lr_schedule = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)
EarlyStopping=tf.keras.callbacks.EarlyStopping(monitor="val_loss",patience=10,verbose=True, mode="min")

# Build the model

In [None]:
catcross_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True, 
                                               label_smoothing=0.1, 
                                               name='categorical_crossentropy' )
NUM_CLASSES = len(labels_dict)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.applications import ResNet50
input_shape = [IMG_SIZE, IMG_SIZE, 3]
# instantiating the model in the strategy scope creates the model on the TPU
def build_model():
    base_model = ResNet50(include_top=False, input_shape=input_shape ,weights="imagenet", pooling='max')
    # Freeze the pretrained weights
    for layer in base_model.layers:
            if not isinstance(layer, layers.BatchNormalization):
                layer.trainable = True
    model = Sequential()
    model.add(base_model)
    # Rebuild top
#     model.add(layers.GlobalAveragePooling2D(name="avg_pool"))
#     model.add(layers.BatchNormalization())
#     model.add(layers.Dense(512,activation='relu'))
#     model.add(layers.Dense(128,activation='relu'))
#     model.add(layers.Dense(32,activation='relu'))
    model.add(layers.Dense(NUM_CLASSES, activation = "softmax"))
    opt = Adam(lr=0.001)
    model.compile(optimizer='Adam', loss=catcross_loss, metrics=['acc'])
    return model

In [None]:
with strategy.scope():
    model = build_model()
model.summary()

# Train the model

In [None]:
history=model.fit(train_dataset,steps_per_epoch=STEPS_PER_EPOCH,epochs=6,validation_data=valid_dataset,callbacks=[lr_schedule,EarlyStopping],verbose=1)

In [None]:
import matplotlib

plt.figure(figsize=(10,8))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("Model Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'])
plt.ylim(ymax = 2, ymin = 0)
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10,8))
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title("Model Accuracy")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'])
plt.ylim(ymax = 1, ymin = 0)
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10,8))
plt.plot(history.history['f1'])
plt.plot(history.history['val_f1'])
plt.title("Model F1-Score")
plt.xlabel('Epochs')
plt.ylabel('f1')
plt.legend(['Train', 'Test'])
plt.ylim(ymax = 1, ymin = 0)
plt.grid()
plt.show()

In [None]:
model.save("resnetplant.h5")

# Make predictions

In [None]:
from tensorflow import keras
model = keras.models.load_model('../input/resnet/resnetplant.h5')

In [None]:
test_dataset = (
    tf.data.Dataset
    .list_files('../input/c/plant-pathology-2021-fgvc8/test_images/'+"*")
    .map(decode_image)
    .batch(BATCH_SIZE)
)

In [None]:
pred = model.predict(test_dataset)

In [None]:
pred

In [None]:
pred.round()

process the predictions

In [None]:
label_names = target_labels.columns
def get_labels_from_pred(preds):
    preds = preds.round()
    lab = []
    for row in preds:
        l = []
        for i,v in enumerate(row):
            if v == 1:
                l.append(i)
        lab.append(" ".join(label_names[l]))
    return lab

In [None]:
pred_labels = get_labels_from_pred(pred)

In [None]:
pred_labels

In [None]:
df_sub["labels"] = pred_labels

In [None]:
df_sub.head()

In [None]:
df_sub.to_csv("submission.csv", index=False, encoding='utf-8')