In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import tensorflow as tf
import logging
logging.basicConfig()
import struct
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten, Dropout, Input, BatchNormalization
%matplotlib inline
import IPython.core.display         
# setup output image format (Chrome works best)
IPython.core.display.set_matplotlib_formats("svg")
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import sklearn
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MultiLabelBinarizer,OneHotEncoder
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K

In [None]:
train = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")
dup = pd.read_csv("../input/duplicates/duplicates.csv")

In [None]:
dup = pd.DataFrame(dup,columns = ['dup1','dup2'])
train = pd.DataFrame(train,columns = ['image','labels'])

In [None]:
print(dup.columns)
print(train.columns)

In [None]:

for i in dup['dup1']:
    train = train.drop(train[train['image']== i ].index,axis = 0)

In [None]:
print(len(train))

In [None]:
train['labels'].sort_values().value_counts().plot.bar()


In [None]:
train['labels'] = train['labels'].apply(lambda s: s.split(' '))
train[:10]

In [None]:
def add_gauss_noise(x,sigma2=0.05):
    return x+np.random.normal(0, sigma2, x.shape)

In [None]:
datagen = ImageDataGenerator(
    rotation_range = 10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    brightness_range = None,
    shear_range = 0.1,
    zoom_range = 0.1,
    rescale = 1./255,
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function = add_gauss_noise,
    validation_split= 0.1
)
bsize  = 16

In [None]:
train_data = datagen.flow_from_dataframe(
    train,
    directory = '../input/resized-plant2021/img_sz_512',
    x_col = 'image',
    y_col = 'labels',
    subset="training",
    color_mode="rgb",
    target_size = (224,224),
    class_mode="categorical",
    batch_size=bsize,
    shuffle=False,
    seed=40,
)

In [None]:
valid_data = datagen.flow_from_dataframe(
    train,
    directory = '../input/resized-plant2021/img_sz_512',
    x_col = 'image',
    y_col = 'labels',
    subset="validation",
    color_mode="rgb",
    target_size = (224,224),
    class_mode="categorical",
    batch_size=bsize,
    shuffle=False,
    seed=40,
)

In [None]:
from tensorflow.keras.preprocessing import image
import random

K.clear_session()
random.seed(4487); tf.random.set_seed(4487)


#w = "../input/plant2021-weight/weights.hdf5"
weight_path = '../input/tf-keras-pretrained-model-weights/No Top/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5'
base_model = tf.keras.applications.DenseNet121(weights=weight_path, include_top=False, pooling='avg')

In [None]:
x = base_model.output
#fully connected layer
x = Dense(64, activation='relu')(x)
x = Dense(16, activation='relu')(x)
# finally, the softmax for the classifier 
predictions = Dense(6, activation='sigmoid')(x)

In [None]:
accname = 'f1_score'

def plot_history(history): 
    fig, ax1 = plt.subplots()
    
    ax1.plot(history.history['loss'], 'r', label="training loss ({:.6f})".format(history.history['loss'][-1]))
    ax1.plot(history.history['val_loss'], 'r--', label="validation loss ({:.6f})".format(history.history['val_loss'][-1]))
    ax1.grid(True)
    ax1.set_xlabel('iteration')
    ax1.legend(loc="best", fontsize=9)    
    ax1.set_ylabel('loss', color='r')
    ax1.tick_params('y', colors='r')

    if accname in history.history:
        ax2 = ax1.twinx()

        ax2.plot(history.history[accname], 'b', label="training f1_score ({:.4f})".format(history.history[accname][-1]))
        ax2.plot(history.history['val_'+accname], 'b--', label="validation f1_score ({:.4f})".format(history.history['val_'+accname][-1]))

        ax2.legend(loc="lower right", fontsize=9)
        ax2.set_ylabel('acc', color='b')        
        ax2.tick_params('y', colors='b')

In [None]:
from tensorflow.keras.models import Sequential, Model

model = Model(inputs=base_model.input,outputs = predictions)
import tensorflow_addons as tfa
import keras 
f1 = tfa.metrics.F1Score(num_classes=6, average='macro')
model.compile(optimizer=keras.optimizers.SGD(lr=0.03, decay=1e-4, momentum=0.8, nesterov=True), 
              loss='binary_crossentropy', metrics=[f1])

accearlystop = keras.callbacks.EarlyStopping(
    monitor=f1,     # look at the validation loss tf2.0 accuracy
    min_delta=0.02,       # threshold to consider as no change
    patience=5,             # stop if  epochs with no change
    verbose=1, mode='max', restore_best_weights= True
)
lossearlystop = keras.callbacks.EarlyStopping(
    monitor='val_loss',     # look at the validation loss tf2.0 accuracy
    min_delta=0.02,       # threshold to consider as no change
    patience=5,             # stop if  epochs with no change
    verbose=1, mode='min', restore_best_weights= True
)
# callbacks_list = [earlystop]
lrschedule = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                 factor=0.05, patience=5, verbose=1)
callbacks_list = [lrschedule]
# callbacks_list = [accearlystop,lossearlystop]
#callbacks_list = []

history = model.fit_generator(
            train_data,  # data from generator
             #steps_per_epoch=1,    # should be number of batches per epoch
            epochs=13,
            callbacks=callbacks_list, 
            validation_data=valid_data, 
            #validation_steps = 1,
            verbose=True)



In [None]:
plot_history(history)

In [None]:
loss, f1score = model.evaluate_generator(valid_data,verbose=1)

In [None]:
from tqdm import tqdm
import PIL

test = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')

for img_name in tqdm(test['image']):
    path = '../input/plant-pathology-2021-fgvc8/test_images/'+str(img_name)
    with PIL.Image.open(path) as img:
        img = img.resize((256,256))
        img.save(f'./{img_name}')

In [None]:
test_data = datagen.flow_from_dataframe(
    test,
    directory = './',
    x_col="image",
    y_col= None,
    color_mode="rgb",
    target_size = (224,224),
    classes=None,
    class_mode=None,
    batch_size=bsize,
    shuffle=False,
    seed=40,
)
best_threshold = 0.4
preds = model.predict(test_data)
print(preds)
preds = preds.tolist()

indices = []
for pred in preds:
    temp = []
    for category in pred:
        if category>=best_threshold:
            temp.append(pred.index(category))
    if temp!=[]:
        indices.append(temp)
    else:
        temp.append(np.argmax(pred))
        indices.append(temp)
    
print(indices)

In [None]:
labels = (train_data.class_indices)
labels = dict((v,k) for k,v in labels.items())
print(labels)

testlabels = []


for image in indices:
    temp = []
    for i in image:
        temp.append(str(labels[i]))
    testlabels.append(' '.join(temp))

print(testlabels)

In [None]:
import os
delfiles = tf.io.gfile.glob('./*.jpg')

for file in delfiles:
    os.remove(file)

In [None]:
sub = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')
sub['labels'] = testlabels
sub.to_csv('submission.csv', index=False)
sub