# **Plant Pathology Analysis with transfer learning(DenseNet 169)**
* *Data Visualization*
* *Data Augmentation*
* *Model training: DenseNet 169*
* *Prediction*
* *Submission*

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import pandas as pd
import seaborn as sns

In [None]:
df=pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")
df

# *Data Visualization*

In [None]:
plt.figure(figsize=(25,10))
plt.xlabel("labels",fontsize=15)
plt.xticks(rotation=15,fontsize = 12,fontweight = "bold")
plt.ylabel("count",fontsize=15)
plt.yticks(fontsize=15)
sns.barplot(data=df,x=df.value_counts("labels").index,y=df.value_counts("labels").values)

In [None]:
train_path="../input/plant-pathology-2021-fgvc8/train_images"
plt.figure(figsize=(20,40))
i=1
for idx,s in df.head(9).iterrows():
    img_path = os.path.join(train_path,s['image'])
    img=cv2.imread(img_path)
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    
    fig=plt.subplot(9,3,i)
    fig.imshow(img)
    fig.set_title(s['labels'])
    i+=1

Since each leaves may have more than one disease(e.g.the second leaf in the above image is *scab,frog_eye_leaf_spot,complex*), so it can be a ***multilabel classification***.

In [None]:
df['labels'].unique()
# 1.healthy
# 2.scab
# 3.complex
# 4.rust
# 5.fog_eye_leaf_spot
# 6.powdery_mildew

In [None]:
df['labels']=df['labels'].apply( lambda string: string.split(' ') )
df.head()

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
hot_labels = mlb.fit_transform(df['labels'])
print(mlb.classes_)
print(hot_labels)

In [None]:
df_labels = pd.DataFrame(hot_labels,columns=mlb.classes_,index=df.index)
df_labels

In [None]:
plt.figure(figsize=(25,10))
sns.barplot(x=df_labels.columns,y=df_labels.sum().values)

# *Data Augmentation*

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1/255.0,
                            rotation_range=5,
                            zoom_range=0.1,
                            shear_range=0.05,
                            horizontal_flip=True,
                            validation_split=0.2)

train_generator = datagen.flow_from_dataframe(
    df,
    directory='../input/resized-plant2021/img_sz_256',
    subset='training',
    x_col='image',
    y_col='labels',
    target_size=(224,224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=444
    )
#'../input/plant-pathology-2021-fgvc8/train_images'
valid_generator = datagen.flow_from_dataframe(
    df,
    directory='../input/resized-plant2021/img_sz_256',
    subset='validation',
    x_col='image',
    y_col='labels',
    target_size=(224,224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=444
    )

# *Transfer Learning: DenseNet 169*

In [None]:
from keras.applications import InceptionResNetV2
from keras.applications import MobileNetV2
from keras.applications import DenseNet121
from keras.applications import DenseNet169

import keras
from keras.layers import Dense,Dropout,Flatten
from tensorflow.keras.layers import GlobalAveragePooling2D
from keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow_addons as tfa

weight_path='../input/tf-keras-pretrained-model-weights/No Top/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5'
base_model=DenseNet169(weights=weight_path,include_top=False, input_shape=(224,224,3))
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(128,activation='relu')(x)
x=Dropout(0.2)(x)
x=Dense(64,activation='relu')(x)
predictions=Dense(6,activation='sigmoid')(x)

model=Model(inputs=base_model.input,outputs=predictions)

for layer in base_model.layers:
    layer.trainable=False


In [None]:
print("train the last few layers and the ouput layers first")

In [None]:
f1 = tfa.metrics.F1Score(num_classes=6,average='macro')

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy',f1])
es=EarlyStopping(patience=4,monitor=f1,mode='max',restore_best_weights=True)
hist = model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    epochs=20,
                    steps_per_epoch=train_generator.samples//128,
                    validation_steps=valid_generator.samples//128,
                    callbacks=[es])

freeze the trained layers, then train the second and the third dense block in DenseNet169

In [None]:
#the third dense block:
# model.layers[143:595]

In [None]:
model.layers[595:]

In [None]:
for layer in model.layers[:595]:
    layer.trainable=False

for layer in model.layers[143:]:
    layer.trainable=True

for layer in model.layers[595:]:
    layer.trainable=False

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy',f1])
history = model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    epochs=15,
                    steps_per_epoch=train_generator.samples//128,
                    validation_steps=valid_generator.samples//128,
                    callbacks=[es])

In [None]:
# accuracy
plt.figure(figsize=(15,6))
epoch_list = list(range(1, len(history.history['accuracy']) + 1))
plt.plot(epoch_list, history.history['accuracy'],label='accuracy')
plt.plot(epoch_list, history.history['val_accuracy'],label='val_accuracy')
plt.legend()
plt.show()

In [None]:
# f1  score
plt.figure(figsize=(15,6))
epoch__list = list(range(1,len(history.history['f1_score'])+1))
plt.plot(epoch__list, history.history['f1_score'],label='f1_score')
plt.plot(epoch__list, history.history['val_f1_score'],label='val_f1_score')
plt.xlabel('epoches')
plt.ylabel('f1')
plt.legend()
plt.show()

In [None]:
#model.save('plant_incepresnetv2.h5')

# *Prediction*

In [None]:
test_path="../input/plant-pathology-2021-fgvc8/sample_submission.csv"
test = pd.read_csv(test_path)
test

In [None]:
test_data = datagen.flow_from_dataframe(
    test,
    directory='../input/plant-pathology-2021-fgvc8/test_images',
    x_col='image',
    y_col=None,
    color_mode='rgb',
    target_size=(224,224),
    class_mode=None,
    shuffle=False
)
predictions = model.predict(test_data)
print(predictions)

class_idx=[]
for pred in predictions:
    pred=list(pred)
    temp=[]
    for i in pred:
        if (i>0.4):
            temp.append(pred.index(i))
    if (temp!=[]):
        class_idx.append(temp)
    else:
        temp.append(np.argmax(pred))
        class_idx.append(temp)
print(class_idx)

In [None]:
class_dict = train_generator.class_indices
def get_key(val):
    for key,value in class_dict.items():
        if (val==value):
            return key
print(class_dict)

sub_pred=[]
for img_ in class_idx:
    img_pred=[]
    for i in img_:
        img_pred.append(get_key(i))
    sub_pred.append( ' '.join(img_pred))
print(sub_pred)

# *Submission*

In [None]:
sub = test[['image']]
sub['labels']=sub_pred
sub

In [None]:
sub.to_csv('submission.csv',index=False)