In [1]:
import math, re, os

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from kaggle_datasets import KaggleDatasets
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.applications import EfficientNetB7
from keras.applications.densenet import DenseNet201
from sklearn import metrics
from sklearn.model_selection import train_test_split
from keras.callbacks import ReduceLROnPlateau

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
AUTO = tf.data.experimental.AUTOTUNE
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [3]:
# Data access
GCS_DS_PATH = KaggleDatasets().get_gcs_path("plant-pathology-2021-fgvc8")

# Configuration
EPOCHS = 10
BATCH_SIZE = 8 * strategy.num_replicas_in_sync
IM_Z = 768

In [4]:
def format_train_path(fname):
    return GCS_DS_PATH+'/train_images/'+fname

def format_test_path(fname):
    return GCS_DS_PATH+'/test_images/'+fname

In [5]:
train_dir="../input/plant-pathology-2021-fgvc8/train_images/"
test_dir="../input/plant-pathology-2021-fgvc8/test_images/"
df_train=pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
df_sub = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')

In [6]:
train_paths = df_train.image.apply(format_train_path)
test_paths = df_sub.image.apply(format_test_path)

In [7]:
labels = df_train['labels'].str.split(" ").apply(pd.Series, 1).stack()
labels.index = labels.index.droplevel(-1)
target_labels = pd.get_dummies(labels).groupby(level=0).sum()
target_labels.head()

In [8]:
train_paths, valid_paths, train_labels, valid_labels = train_test_split(
    train_paths, target_labels, test_size=0.15)

In [9]:
def decode_image(filename, label=None, image_size=(IM_Z, IM_Z)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = keras.applications.densenet.preprocess_input(image)
    image = tf.image.resize(image, image_size)
    
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    image = tf.image.random_brightness(image, 0.1)
    
    if label is None:
        return image
    else:
        return image, label

In [10]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .cache()
    .map(data_augment, num_parallel_calls=AUTO)
    .repeat()
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)
test_valid_dataset=(
    tf.data.Dataset
    .from_tensor_slices(valid_paths)
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
)
test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
)

In [11]:
with strategy.scope():
    model = tf.keras.Sequential([
        keras.applications.Xception(
            input_shape=(IM_Z, IM_Z, 3),
            weights='imagenet',
            include_top=False
        ),
        GlobalAveragePooling2D(),
        Dense(512, activation = 'relu'),
        Dropout(0.2),
        Dense(32, activation = 'relu'),
        Dense(6, activation='sigmoid')
    ])
    
    f1 = tfa.metrics.F1Score(num_classes = 6, average = 'macro')
        
    model.compile(
        optimizer = 'adam',
        loss = 'binary_crossentropy',
        metrics = [f1]
    )
    model.summary()

In [12]:
reduceLR = ReduceLROnPlateau(monitor = f1, mode = 'max', factor = 0.1, patience = 3, min_lr = 1e-9, verbose = 1)

history = model.fit(
    train_dataset, 
    epochs=EPOCHS, 
    steps_per_epoch=train_labels.shape[0] // BATCH_SIZE,
    validation_data=valid_dataset,
    validation_steps=valid_labels.shape[0] // BATCH_SIZE,
    callbacks=[reduceLR]
)

In [13]:
model.save('model_30.h5')

In [14]:
from tensorflow import keras
f1 = tfa.metrics.F1Score(num_classes = 6, average = None)
        
model1 = keras.models.load_model('../input/tpu-efficientnet/tpu_EfficientNetB7.h5')
model2 = keras.models.load_model('../input/tpu-densenet/tpu_dense.h5')
model3 = keras.models.load_model('../input/tpu-efficientnet-no-aug/model1.h5')

In [15]:
import os
dir_list = os.listdir('../input/plant-pathology-2021-fgvc8/test_images')
test = pd.DataFrame()
test.insert(0,'image',dir_list,True)
test.head()

Unnamed: 0,image
0,ad8770db05586b59.jpg
1,c7b03e718489f3ca.jpg
2,85f8cb619c66b863.jpg


In [16]:
from keras.preprocessing.image import ImageDataGenerator
label_list = [ 'complex', 'frog_eye_leaf_spot','healthy','powdery_mildew',  'rust', 'scab']

test_datagen1 = ImageDataGenerator(preprocessing_function = keras.applications.efficientnet.preprocess_input)

test_generator1 = test_datagen1.flow_from_dataframe(dataframe = test,
                                                  directory = '../input/plant-pathology-2021-fgvc8/test_images',
                                                  x_col = 'image',
                                                  y_col = None,
                                                  color_mode = 'rgb',
                                                  target_size = (768,768),
                                                  batch_size = 1,
                                                  class_mode = None,
                                                  classes = None,
                                                  shuffle = False)

test_datagen2 = ImageDataGenerator(preprocessing_function = keras.applications.densenet.preprocess_input)

test_generator2 = test_datagen2.flow_from_dataframe(dataframe = test,
                                                  directory = '../input/plant-pathology-2021-fgvc8/test_images',
                                                  x_col = 'image',
                                                  y_col = None,
                                                  color_mode = 'rgb',
                                                  target_size = (512,512),
                                                  batch_size = 1,
                                                  class_mode = None,
                                                  classes = None,
                                                  shuffle = False)

Found 3 validated image filenames.
Found 3 validated image filenames.


In [17]:
label_list = [ 'complex', 'frog_eye_leaf_spot','healthy','powdery_mildew',  'rust', 'scab']
pre = []
indexes = []
probs = (2*model1.predict(test_generator1) + model2.predict(test_generator2) + 2*model3.predict(test_generator1)) / 5
for index,i in enumerate(probs):
    temp=[]
    for k,j in enumerate(i): 
        if j>0.5:
            temp.append(k)
    indexes.append(temp)
# indexes
for j in indexes:
    st = []
    for k in j:
        st.append(label_list[k])
    pre.append(" ".join(st))
pre
test.insert(1,'labels',pre,True)
test.to_csv('submission.csv', index=False)

In [18]:
test

Unnamed: 0,image,labels
0,ad8770db05586b59.jpg,frog_eye_leaf_spot scab
1,c7b03e718489f3ca.jpg,frog_eye_leaf_spot
2,85f8cb619c66b863.jpg,scab
