In [None]:
!pip install -q ../input/kerasapplications
!pip install -q '../input/efficientnet-keras-source-code'

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Flatten,Dense,Dropout,BatchNormalization, Input
from tensorflow.keras.applications import VGG16, InceptionResNetV2, ResNet50, Xception
from kaggle_datasets import KaggleDatasets
import cv2
from PIL import Image
from IPython.display import FileLink
from glob import glob
import random
import math
from sklearn.model_selection import train_test_split

In [None]:
# try:
#     tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
#     print('Device:', tpu.master())
#     tf.config.experimental_connect_to_cluster(tpu)
#     tf.tpu.experimental.initialize_tpu_system(tpu)
#     strategy = tf.distribute.experimental.TPUStrategy(tpu)
# except:
#     strategy = tf.distribute.get_strategy()
# print('Number of replicas:', strategy.num_replicas_in_sync)

In [None]:
# GCS_PATH = KaggleDatasets().get_gcs_path()

path = '../input/plant-pathology-2021-fgvc8/'
train_dir = path + 'train_images/'
test_dir = path + 'test_images/'

In [None]:
df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
df.head()

In [None]:
df['labels'] = df['labels'].map(lambda values: values.split())

In [None]:
df['complete_path'] = train_dir + df['image']
df.head()

In [None]:
df.shape

In [None]:
label_names = {0: 'complex', 1: 'powdery_mildew', 2: 'frog_eye_leaf_spot', 3: 'rust', 4: 'scab', 5: 'healthy'}

reverse_train_labels = dict((v,k) for k,v in label_names.items())

In [None]:
list1 = []
for i in range(len(df['labels'].values)):
    for j in range(len(df['labels'].values[i])):
        df['labels'].values[i][j] = df['labels'].values[i][j].replace(df['labels'].values[i][j], str(reverse_train_labels[df['labels'].values[i][j]]))
for i in range(len(df['labels'].values)):
    list1.append(df['labels'].values[i])
# print(list1)
df['labels'].head()

In [None]:
df.head()

In [None]:
df['labels'] = df['labels'].map(lambda values: ' '.join(values).strip())
df.head()

In [None]:
train, val = train_test_split(df, test_size=0.2, random_state=999, shuffle=True)
train.shape, val.shape

In [None]:
train.head()

In [None]:
def get_clean_data(df):
    targets = []
    paths = []
    for _, row in df.iterrows():
        target_np = np.zeros((6))
        t = [int(t) for t in row.labels.split()]
        target_np[t] = 1
        targets.append(target_np)
        paths.append(row.complete_path)
    return np.array(paths), np.array(targets)

In [None]:
train_path, train_target = get_clean_data(train)
val_path, val_target = get_clean_data(val)

In [None]:
print(f'Train path shape: {train_path.shape}')
print(f'Train target shape: {train_target.shape}')
print(f'Val path shape: {val_path.shape}')
print(f'Val target shape: {val_target.shape}')

In [None]:
train_data = tf.data.Dataset.from_tensor_slices((train_path, train_target))
val_data = tf.data.Dataset.from_tensor_slices((val_path, val_target))

In [None]:
target_size_dim = 400

In [None]:
def process_data_train(image_path, label):
    # load the raw data from the file as a string
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.random_brightness(img, 0.3)
    img = tf.image.random_flip_left_right(img, seed=None)
    img = tf.image.random_flip_up_down(img)
    img = tf.image.resize(img, size=[target_size_dim, target_size_dim])
    img = tf.cast(img, tf.float32)
    return img/255, label

def process_data_valid(image_path, label):
    # load the raw data from the file as a string
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [target_size_dim,target_size_dim])
    img = tf.cast(img, tf.float32)
    return img/255, label

In [None]:
train_ds = train_data.map(process_data_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
valid_ds = val_data.map(process_data_valid, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [None]:
def configure_for_performance(ds, batch_size = 128):
#     ds = ds.cache('/kaggle/dump.tfcache') 
    
    ds = ds.shuffle(buffer_size=200)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return ds

batch_size = 64

train_ds_batch = configure_for_performance(train_ds, batch_size)
valid_ds_batch = valid_ds.batch(batch_size)

In [None]:
# !pip install efficientnet
import efficientnet.keras as efn

In [None]:
new_model = tf.keras.models.load_model(r"../input/trained-model-for-plant-disease-competition/28_may_plant_model.h5")

In [None]:
# new_model.summary()

In [None]:
# history = new_model.fit(train_ds_batch, validation_data = valid_ds_batch, epochs = 1)

In [None]:
def plotLearningCurve(history,epochs):
  epochRange = range(1,epochs+1)
  plt.figure(figsize = (12,6))
  plt.plot(epochRange,history.history['accuracy'])
  plt.plot(epochRange,history.history['val_accuracy'])
  plt.title('Model Accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

  plt.figure(figsize = (12,6))
  plt.plot(epochRange,history.history['loss'])
  plt.plot(epochRange,history.history['val_loss'])
  plt.title('Model Loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

# plotLearningCurve(history,20)

In [None]:
test_set = pd.read_csv(path + 'sample_submission.csv')
test_set.head()

In [None]:
test_imgs = ['../input/plant-pathology-2021-fgvc8/test_images/{}'.format(x) for x in list(test_set.image)]
print(test_imgs)

In [None]:
df_test = pd.DataFrame(np.array(test_imgs), columns=['Path'])
df_test.head()

In [None]:
test_ds = tf.data.Dataset.from_tensor_slices((df_test.Path.values))

def process_test(image_path):
    # load the raw data from the file as a string
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.random_brightness(img, 0.3)
    img = tf.image.random_flip_left_right(img, seed=None)
    img = tf.image.random_flip_up_down(img)
    img = tf.image.resize(img, size=[target_size_dim, target_size_dim])
    img = tf.cast(img, tf.float32)
    return img/255
    
test_ds = test_ds.map(process_test, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(batch_size*2)

In [None]:
preds = []
for i in range(6):
    
    pred_test = new_model.predict(test_ds, workers=16, verbose=1)
    preds.append(pred_test)

In [None]:
pred_y = np.mean(preds, axis=0)
print(pred_y)

In [None]:
threshold = {0: 0.25, 1: 0.30, 2: 0.23, 3: 0.30, 4: 0.30, 5: 0.30}

In [None]:
pred_string = []
for line in pred_y:
    s = ''
    for i in range(6):
        if line[i] > threshold[i]:
            s = s + label_names[i] + ' '

    pred_string.append(s)
print(pred_string)

In [None]:
df_test['image'] = df_test.Path.str.split('/').str[-1]
df_test['labels'] = pred_string
# df_test['labels'] = df_test['labels'].replace(label_names)
df_test= df_test[['image','labels']]
df_test.head()

In [None]:
os.chdir(r'/kaggle/working')

df_test.to_csv('submission.csv', index=False)

In [None]:
FileLink(r'submission.csv')