In [None]:
import numpy as np 
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from pathlib import Path
import os.path
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import os
import cv2
import pandas as pd
import random
import os
import PIL
import tensorflow as tf
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
path = '../input/plant-pathology-2021-fgvc8'

In [None]:
train_df = pd.read_csv(r'../input/plant-pathology-2021-fgvc8/train.csv', index_col='image')

In [None]:
train_df.labels.value_counts()

In [None]:
plt.figure(figsize=(15,12))
labels = sns.barplot(train_df.labels.value_counts().index,train_df.labels.value_counts())
for item in labels.get_xticklabels():
    item.set_rotation(45)
plt.title('Label Distribution', weight='bold')
plt.show()

In [None]:
train_img_Path = '../input/plant-pathology-2021-fgvc8/train_images'
test_img_Path = '../input/plant-pathology-2021-fgvc8/test_images'
sample_submission = pd.read_csv(r'../input/plant-pathology-2021-fgvc8/sample_submission.csv')

In [None]:
train_df.head()

In [None]:
train_df['labels'] = train_df['labels'].apply(lambda s: s.split(' '))
train_df.head()

In [None]:

fig1 = plt.figure(figsize=(20,10))

for i in range(1, 10):
    
    rand =  random.randrange(1, 18000)
    sample = os.path.join('../input/plant-pathology-2021-fgvc8/train_images', train_df.index[rand])
    
    img = PIL.Image.open(sample)
    
    ax = fig1.add_subplot(4,3,i)
    ax.imshow(img)
    
    title = f"{train_df['labels'][rand]}{img.size}"
    plt.title(title)
    
    fig1.tight_layout()

In [None]:
TRAIN_PATH = '../input/resized-plant2021/img_sz_512/'
TEST_PATH = '../input/plant-pathology-2021-fgvc8/test_images/'


In [None]:
resized_train = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')

In [None]:
resized_train.shape

In [None]:
resized_train['labels'] = resized_train['labels'].apply(lambda s: s.split(' '))
resized_train.head()

In [None]:
trans_label = MultiLabelBinarizer().fit(resized_train['labels'])
labels = pd.DataFrame(trans_label.transform(resized_train['labels']), columns=trans_label.classes_)
train_df = pd.concat([resized_train['image'], labels], axis=1)
train_df.head()

In [None]:
resized_train

In [None]:
import keras
datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255.0,
                                                        preprocessing_function=None,
                                                        data_format=None,
                                                        validation_split= 0.2
                                                    )

In [None]:
train_data = datagen.flow_from_dataframe(
    resized_train,
    directory = '../input/resized-plant2021/img_sz_512',
    x_col = 'image',
    y_col = 'labels',
    subset="training",
    color_mode="rgb",
    target_size = (224,224),
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=40,
)

In [None]:
valid_data = datagen.flow_from_dataframe(
    resized_train,
    directory = '../input/resized-plant2021/img_sz_512',
    x_col = 'image',
    y_col = 'labels',
    subset="validation",
    color_mode="rgb",
    target_size = (224,224),
    class_mode="categorical",
    batch_size=32,
    shuffle=False,
    seed=40,
)

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
hot_labels = mlb.fit_transform(resized_train['labels'])
print(mlb.classes_)
print(hot_labels)

In [None]:
df_labels = pd.DataFrame(hot_labels,columns=mlb.classes_,index=resized_train.index)
df_labels

In [None]:
plt.figure(figsize=(15,10))
sns.barplot(x=df_labels.columns,y=df_labels.sum().values)

# **DenseNet 121**

In [None]:
from keras.applications import InceptionResNetV2
from keras.applications import MobileNetV2
from keras.applications import DenseNet121
from keras.applications import DenseNet169

import keras
from keras.layers import Dense,Dropout,Flatten
from tensorflow.keras.layers import GlobalAveragePooling2D
from keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow_addons as tfa

weight_path='../input/tf-keras-pretrained-model-weights/No Top/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5'
base_model=DenseNet121(weights=weight_path,include_top=False, input_shape=(224,224,3))
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(128,activation='relu')(x)
x=Dropout(0.5)(x)
x=Dense(64,activation='relu')(x)
predictions=Dense(6,activation='sigmoid')(x)

model=Model(inputs=base_model.input,outputs=predictions)

for layer in base_model.layers:
    layer.trainable=False

In [None]:
f1 = tfa.metrics.F1Score(num_classes=6,average='macro')

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy',f1])
es=EarlyStopping(patience=4,monitor=f1,mode='max',restore_best_weights=True)
hist = model.fit_generator(generator=train_data,
                    validation_data=valid_data,
                    epochs=20,
                    steps_per_epoch=train_data.samples//128,
                    validation_steps=valid_data.samples//128,
                    callbacks=[es])

# Accuracy

In [None]:
plt.figure(figsize=(15,6))
epoch_list = list(range(1, len(hist.history['accuracy']) + 1))
plt.plot(epoch_list, hist.history['accuracy'],label='accuracy')
plt.plot(epoch_list, hist.history['val_accuracy'],label='val_accuracy')
plt.legend()
plt.show()

# F1

In [None]:
plt.figure(figsize=(15,6))
epoch_list = list(range(1, len(hist.history['f1_score']) + 1))
plt.plot(epoch_list, hist.history['f1_score'],label='f1_score')
plt.plot(epoch_list, hist.history['val_f1_score'],label='val_f1_score')
plt.legend()
plt.show()

# Loss

In [None]:
plt.figure(figsize=(15,6))
epoch_list = list(range(1, len(hist.history['accuracy']) + 1))
plt.plot(epoch_list, hist.history['loss'],label='loss')
plt.plot(epoch_list, hist.history['val_loss'],label='val_loss')
plt.legend()
plt.show()

In [None]:
test_path="../input/plant-pathology-2021-fgvc8/sample_submission.csv"
test = pd.read_csv(test_path)
test

In [None]:
test_data = datagen.flow_from_dataframe(
    test,
    directory='../input/plant-pathology-2021-fgvc8/test_images',
    x_col='image',
    y_col=None,
    color_mode='rgb',
    target_size=(224,224),
    class_mode=None,
    shuffle=False
)
predictions = model.predict(test_data)
print(predictions)

class_idx=[]
for pred in predictions:
    pred=list(pred)
    temp=[]
    for i in pred:
        if (i>0.4):
            temp.append(pred.index(i))
    if (temp!=[]):
        class_idx.append(temp)
    else:
        temp.append(np.argmax(pred))
        class_idx.append(temp)
print(class_idx)


In [None]:
class_dict = train_data.class_indices
def get_key(val):
    for key,value in class_dict.items():
        if (val==value):
            return key
print(class_dict)

sub_pred=[]
for img_ in class_idx:
    img_pred=[]
    for i in img_:
        img_pred.append(get_key(i))
    sub_pred.append( ' '.join(img_pred))
print(sub_pred)

In [None]:
sub = test[['image']]
sub['labels']=sub_pred
sub

In [None]:
sub.to_csv('submission.csv',index=False)