# **Plant Pathology Analysis with transfer learning(DenseNet 169)**
* *Data Visualization*
* *Data Augmentation*
* *Model training: DenseNet 169*
* *Prediction*
* *Submission*

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import pandas as pd
import seaborn as sns
from sklearn.metrics import classification_report 

^import 하는 부분

In [None]:
df=pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")
df

대회에서 제공하는 파일 읽기

# *Data Visualization*

Since each leaves may have more than one disease(e.g.the second leaf in the above image is *scab,frog_eye_leaf_spot,complex*), so it can be a ***multilabel classification***.

In [None]:
df['labels'].unique()
# 1.healthy
# 2.scab
# 3.complex
# 4.rust
# 5.fog_eye_leaf_spot
# 6.powdery_mildew

In [None]:
df['labels']=df['labels'].apply( lambda string: string.split(' ') )
df.head()

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
hot_labels = mlb.fit_transform(df['labels'])
print(mlb.classes_)
print(hot_labels)

^라벨 이진화

In [None]:
df_labels = pd.DataFrame(hot_labels,columns=mlb.classes_,index=df.index)
df_labels

# *Data Augmentation*
데이터의 양을 늘리기위해 원본 이미지에 각종 변환을 적용시켜 개수를 증강시키는 기법


In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1/255.0,
                            rotation_range=5,
                            zoom_range=0.1,
                            shear_range=0.05,
                            horizontal_flip=True,
                            validation_split=0.2)

train_generator = datagen.flow_from_dataframe(
    df,
    directory='../input/resized-plant2021/img_sz_256',
    subset='training',
    x_col='image',
    y_col='labels',
    target_size=(224,224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=444
    )
#'../input/plant-pathology-2021-fgvc8/train_images'
valid_generator = datagen.flow_from_dataframe(
    df,
    directory='../input/resized-plant2021/img_sz_256',
    subset='validation',
    x_col='image',
    y_col='labels',
    target_size=(224,224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=444
    )

# *Transfer Learning: DenseNet 169*

In [None]:
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications import DenseNet169

import keras
from keras.layers import Dense,Dropout,Flatten
from tensorflow.keras.layers import GlobalAveragePooling2D
from keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow_addons as tfa

weight_path='../input/tf-keras-pretrained-model-weights/No Top/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5'
base_model=DenseNet169(weights=weight_path,include_top=False, input_shape=(224,224,3))
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(128,activation='relu')(x)
x=Dropout(0.2)(x)
x=Dense(64,activation='relu')(x)
predictions=Dense(6,activation='sigmoid')(x)

model=Model(inputs=base_model.input,outputs=predictions)

for layer in base_model.layers:
    layer.trainable=False


trian the last few layers and the ouput layers first

In [None]:
metrics = [       
        keras.metrics.CategoricalAccuracy(name='accuracy'),
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall')
    ]

f1 = tfa.metrics.F1Score(num_classes=6,average='macro')
es=EarlyStopping(patience=4,monitor=f1,mode='max',restore_best_weights=True)


freeze the trained layers, then train the second and the third dense block in DenseNet169

In [None]:
model.layers[595:]

In [None]:
for layer in model.layers[:595]:
    layer.trainable=False

for layer in model.layers[143:]:
    layer.trainable=True

for layer in model.layers[595:]:
    layer.trainable=False
    
model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=[metrics,f1])
history = model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    epochs=200,
                    steps_per_epoch=train_generator.samples//128,
                    validation_steps=valid_generator.samples//128,
                    callbacks=[es])

In [None]:
# 각 평균값 출력

arr1 = history.history['loss']
result1 = sum(arr1)
print(f"loss_av : {result1 / len(arr1)}")

arr2 = history.history['accuracy']
result2 = sum(arr2)
print(f"accuracy_av : {result2 / len(arr2)}")

arr3 = history.history['precision']
result3 = sum(arr3)
print(f"precision_av : {result3 / len(arr3)}")

arr4 = history.history['recall']
result4 = sum(arr4)
print(f"recall_av : {result4 / len(arr4)}")

In [None]:
# accuracy
plt.figure(figsize=(15,12))
plt.rc('font', size=20)   
epoch_list = list(range(1, len(history.history['accuracy'])+1))
plt.plot(epoch_list, history.history['accuracy'],label='accuracy')
plt.xlabel('epoches')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
# loss
plt.figure(figsize=(15,12))
plt.rc('font', size=20)   
epoch__list = list(range(1,len(history.history['loss'])+1))
plt.plot(epoch__list, history.history['loss'],label='loss')
plt.xlabel('epoches')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
# precision
plt.figure(figsize=(15,12))
plt.rc('font', size=20)   
epoch__list = list(range(1,len(history.history['precision'])+1))
plt.plot(epoch__list, history.history['precision'],label='precision')
plt.xlabel('epoches')
plt.ylabel('precision')
plt.legend()
plt.show()

In [None]:
# recall
plt.figure(figsize=(15,12))
plt.rc('font', size=20)   
epoch__list = list(range(1,len(history.history['recall'])+1))
plt.plot(epoch__list, history.history['recall'],label='recall')
plt.xlabel('epoches')
plt.ylabel('recall')
plt.legend()
plt.show()

In [None]:
# f1  score
plt.figure(figsize=(15,12))
plt.rc('font', size=20)   
epoch__list = list(range(1,len(history.history['f1_score'])+1))
plt.plot(epoch__list, history.history['f1_score'],label='f1_score')
plt.xlabel('epoches')
plt.ylabel('f1')
plt.legend()
plt.show()

In [None]:
#model.save('plant_incepresnetv2.h5')

# *Prediction*

In [None]:
test_path="../input/plant-pathology-2021-fgvc8/sample_submission.csv"
test = pd.read_csv(test_path)
test

In [None]:
test_data = datagen.flow_from_dataframe(
    test,
    directory='../input/plant-pathology-2021-fgvc8/test_images',
    x_col='image',
    y_col=None,
    color_mode='rgb',
    target_size=(224,224),
    class_mode=None,
    shuffle=False
)
predictions = model.predict(test_data)
print(predictions)

class_idx=[]
for pred in predictions:
    pred=list(pred)
    temp=[]
    for i in pred:
        if (i>0.4):
            temp.append(pred.index(i))
    if (temp!=[]):
        class_idx.append(temp)
    else:
        temp.append(np.argmax(pred))
        class_idx.append(temp)
print(class_idx)

In [None]:
class_dict = train_generator.class_indices
def get_key(val):
    for key,value in class_dict.items():
        if (val==value):
            return key
print(class_dict)

sub_pred=[]
for img_ in class_idx:
    img_pred=[]
    for i in img_:
        img_pred.append(get_key(i))
    sub_pred.append( ' '.join(img_pred))
print(sub_pred)

# *Submission*

In [None]:
sub = test[['image']]
sub['labels']=sub_pred
sub

In [None]:
sub.to_csv('submission.csv',index=False)