## Preparing the ground <a id="1.1"></a>

[Build Model](https://www.kaggle.com/tamtamxtamtam/plant-2021)

In [None]:
! pip install ../input/keras108/Keras_Applications-1.0.8-py3-none-any.whl
! pip install ../input/efficientnet/efficientnet-1.1.0/ -f ./ --no-index -q

### Install and import necessary libraries

In [None]:
import os
import gc
import re

import cv2
import math
import numpy as np
import scipy as sp
import pandas as pd

import tensorflow as tf
from IPython.display import SVG
import efficientnet.tfkeras as efc
from keras.utils import plot_model
import tensorflow.keras.layers as L
from keras.utils import model_to_dot
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import load_model

import seaborn as sns
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn import metrics
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

np.random.seed(0)
tf.random.set_seed(0)

import warnings
warnings.filterwarnings("ignore")

### Setup TPU Config

In [None]:
# AUTO = tf.data.experimental.AUTOTUNE
# tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# strategy = tf.distribute.experimental.TPUStrategy(tpu)

BATCH_SIZE = 16 * 8
# GCS_DS_PATH = 'gs://kds-044025978685b91e16a595d7ad1cbace6eb4029e76d4f555679a21cd'

### Load labels and paths

In [None]:
def decode_image(filename, label=None, image_size=(512, 512)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label


In [None]:
model = tf.keras.models.load_model('../input/efficientnet-full/EfficientNet_epoch20_2.h5')

In [None]:
def format_test(st):
    return '../input/plant-pathology-2021-fgvc8/test_images/' + st 

test_dir = '/kaggle/input/plant-pathology-2021-fgvc8/test_images/'
test_data = pd.DataFrame(os.listdir(test_dir), columns=['image'])
test_paths = test_data.image.apply(format_test).values

In [None]:
test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls=-1)
    .batch(BATCH_SIZE)
)

In [None]:
# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(test_paths, test_paths, test_size=0.999, random_state=42)
# test_dataset = (
#     tf.data.Dataset
#     .from_tensor_slices(X_train)
#     .map(decode_image, num_parallel_calls=AUTO)
#     .batch(BATCH_SIZE)
# )

In [None]:
IMAGE_PATH = "../input/plant-pathology-2021-fgvc8/train_images/"
TRAIN_PATH = "../input/plant-pathology-2021-fgvc8/train.csv"
train_data = pd.read_csv(TRAIN_PATH)
aaa = train_data['labels'].value_counts().keys()
df = pd.DataFrame()
df['label'] = aaa
df['labels'] = aaa
df['labels'] =  df['labels'].apply(lambda string: string.split(' '))
s = list(df['labels'])
mlb = MultiLabelBinarizer()
trainx = pd.DataFrame(mlb.fit_transform(s), columns=mlb.classes_, index=df.index)
trainx['label'] = aaa
merge = {}
for i in aaa:
    merge[tuple(np.array(trainx[trainx['label']==i])[0][0:-1])] = i
merge

### Generate submission

In [None]:
probs_efn = model.predict(test_dataset, verbose=1)
print(probs_efn)
sub = pd.DataFrame(columns = ['image', 'labels'])

In [None]:
arr =  ['complex','frog_eye_leaf_spot',	'healthy'	,'powdery_mildew'	,'rust',	'scab']

In [None]:
output = []
for aa in probs_efn:
    cnt = 0
    thres = 0.2
    ok = False
    while (thres<0.5):
        m = [0,0,0,0,0,0]
        ind = np.argwhere(aa > thres)
        for i in ind:
            m[i[0]] = 1
        if tuple(m) in merge:
            output.append(merge[tuple(m)])
            ok = True
            break
        thres += 0.05
    if ok == False:
        output.append('scab')
output

In [None]:
sub['image'] = test_data['image']
sub['labels'] = output
sub

In [None]:
sub.to_csv('./submission.csv', index=False)