In [None]:
import numpy as np
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense,GlobalAveragePooling2D,Flatten,Conv2D,BatchNormalization,Dropout,MaxPooling2D,Activation
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator as Imgen

from PIL import Image
from sklearn.metrics import confusion_matrix,classification_report

physical_devices = tensorflow.config.list_physical_devices('GPU')
tensorflow.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
with open("../input/plant-pathology-2021-fgvc8/train.csv") as f:
    lines = f.readlines()

classes = {}
train_x, tmp_y = [], []
for id, line in enumerate(lines):
    line = line.replace('\n', '')
    if id>0:
        train_x.append(line.split(',')[0])
        cs = line.split(',')[1].split(' ')
        for c in cs:
            if c not in classes:
                classes.update( { c: len(classes)})
                
        tmp_y.append(cs)


train_y = []
train_y_columns = []
for c in classes:
    train_y_columns.append(c)


for iid, y in enumerate(tmp_y):
    labels = [train_x[iid]]
    for id, label in enumerate(classes):
        
        if label in y:
            labels = labels + [1]
        else:
            labels = labels + [0]
    
    train_y.append(labels)
       
#print(list(zip(train_x, train_y)))    
#lst = list(zip(train_x, train_y))

    
train_df = pd.DataFrame(train_y, columns =['image'] + train_y_columns )


In [None]:
train_df.head()

In [None]:
for cname in classes:
    print(train_df[cname].value_counts())
    print('-------------------------------------------')

In [None]:
print(classes)

In [None]:
for testimg in ['800113bb65efe69e.jpg', '80070f7fb5e2ccaa.jpg', '80077517781fb94f.jpg', '800cbf0ff87721f8.jpg']:
    img = cv2.imread('../input/plant-pathology-2021-fgvc8/train_images/{}'.format(testimg))
    print(img.shape)

In [None]:
sample = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')
sample.shape

In [None]:
sample.head()

In [None]:
train_size = (448,448)
batch_size=64
epochs = 30

In [None]:
!ls ../input/detector/

In [None]:
det_model_path = "../input/detector/detector.h5"
det_image_size = (224,224)
det_model = load_model(det_model_path)

def detect_leaf(image):
    #print('image', image.shape)
    image = cv2.resize(image, det_image_size)
    # make bounding box predictions on the input image
    preds = det_model.predict( np.array([image])/255) [0]
    (startX, startY, endX, endY) = preds
    (h, w) = image.shape[:2]

    # scale the predicted bounding box coordinates based on the image
    # dimensions
    startX = int(startX * w)
    startY = int(startY * h)
    endX = int(endX * w)
    endY = int(endY * h)
    #print( (startX, startY, endX, endY))
    crop = image[startY:endY, startX:endX]
    crop = cv2.resize(crop, train_size)
    #print('crop', crop.shape)
    return crop

In [None]:
datagen = Imgen( preprocessing_function=detect_leaf,
                 rotation_range=4,
                  shear_range=0.2,
                  zoom_range=0.2,
                  horizontal_flip=True,
                  validation_split=0.2,
                  rescale=1./255
                 )

In [None]:
train_ds = datagen.flow_from_dataframe(
    train_df,
    directory = '../input/plant-pathology-2021-fgvc8/train_images', 
    x_col = 'image',
    y_col = train_y_columns,
    subset="training",
    color_mode="rgb",
    target_size = train_size,
    class_mode="raw",
    batch_size=batch_size,
    shuffle=True,
    seed=123,
)

In [None]:
val_ds = datagen.flow_from_dataframe(
    train_df,
    directory = '../input/plant-pathology-2021-fgvc8/train_images',
    x_col = 'image',
    y_col = train_y_columns,
    subset="validation",
    color_mode="rgb",
    target_size = train_size,
    class_mode="raw",
    batch_size=batch_size,
    shuffle=True,
    seed=123,
)

In [None]:
x,y = next(train_ds)
x.shape
y.shape

In [None]:
#plot function
def plot_images(img,labels):
    plt.figure(figsize=(20,8))
    for i in range(10):
        plt.subplot(2,5,i+1)
        #plt.imshow(img[i])
        plt.imshow((img[i]*255).astype(np.uint8))
        txt = ''
        for id, lab in enumerate(labels[i]):
            if lab == 1:
                for cn in classes:
                    if classes[cn] == id:
                        txt += cn + ','
                
        plt.title(txt)
        plt.axis('off')

x,y = next(train_ds)
plot_images(x,y)

In [None]:
from tensorflow.keras.layers import BatchNormalization

'''
model = Sequential([
    
    Conv2D(32,(3,3),activation='relu',input_shape=(train_size[1],train_size[0],3)),
    MaxPooling2D((2,2)),
    #BatchNormalization(),
    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    #BatchNormalization(),
    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    #BatchNormalization(),
    Conv2D(128,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    #BatchNormalization(),
    tensorflow.keras.layers.GlobalAveragePooling2D(),
    Dense(12,activation='softmax')
    
])
'''
model = Sequential([
    
    Conv2D(32,(3,3),activation='relu',input_shape=(train_size[1],train_size[0],3)),
    MaxPooling2D((2,2)),
    
    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    
    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    
    Conv2D(128,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    
    tensorflow.keras.layers.GlobalAveragePooling2D(),
    Dense(len(train_y_columns),activation='sigmoid')
    
])

In [None]:
model.summary()

In [None]:
# sudo apt install graphviz
# pip install pydot
tensorflow.keras.utils.plot_model(model,
                      show_shapes=True,
                      show_dtype=True,
                      show_layer_names=True)

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
my_calls = [keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=3),
            keras.callbacks.ModelCheckpoint("Model_xcp.h5",verbose=1,save_best_only=True)]

In [None]:
model = tensorflow.keras.models.load_model('../input/trainedlocal/last_crop.h5')
#hist = model.fit(train_ds,epochs=epochs,validation_data=val_ds,callbacks=my_calls)

In [None]:
model.save('final_crop.h5')

In [None]:
'''
plt.figure(figsize=(15,6))

plt.subplot(1,2,1)
plt.plot(hist.epoch,hist.history['accuracy'],label = 'Training')
plt.plot(hist.epoch,hist.history['val_accuracy'],label = 'validation')

plt.title("Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(hist.epoch,hist.history['loss'],label = 'Training')
plt.plot(hist.epoch,hist.history['val_loss'],label = 'validation')

plt.title("Loss")
plt.legend()
plt.show()
'''

In [None]:
def plot_images(img,labels):
        
    plt.figure(figsize=(20,8))
    for i in range(3):
        plt.subplot(2,5,i+1)
        
        plt.imshow((img[i]).astype(np.uint8))
        txt = labels[i]
        for id, lab in enumerate(labels[i]):
            if lab == 1:
                for cn in classes:
                    if classes[cn] == id:
                        txt += cn + ', '
                
        plt.title(txt)
        plt.axis('off')

def get_cname(pred):        
    txt = ''
    detected = False
    for id, p in enumerate(pred):
        if p>0.35:
            lab = 1
        else:
            lab = 0
            
        if lab == 1:
            detected = True
            for cn in classes:
                if classes[cn] == id:
                    txt += cn + ' '        

                    
    return txt


test_imgs = []
img_names = []
predicts = []
for imgname in sample["image"]:
    img_names.append(imgname)
    img_path = os.path.join('../input/plant-pathology-2021-fgvc8/test_images', imgname)
    print(img_path)
    
    img = cv2.imread(img_path)  
    test_imgs.append( img )
    
    crop = detect_leaf(img)
    
    crop = cv2.resize(crop, train_size)
    
    crop = np.array([crop])/255
    pred = model.predict( crop )
    predicted = get_cname(pred[0])
    predicts.append(predicted)
    
#plot_images(test_imgs,predicts)

In [None]:
with open('../input/plant-pathology-2021-fgvc8/sample_submission.csv') as s:
    lines = s.readlines()

with open('submission.csv', 'w') as file:    
    for id, line in enumerate(lines):
        if id==0:
            file.write(line)
        else:
            imgfile = line.split(',')[0]
            newline = '{},{}\n'.format(imgfile, predicts[id-1] )
            file.write(newline)

!cat submission.csv            

In [None]:
'''
api_token= {"username":"chenghsuntseng","key":"44cfe92076e822340cdabfa0083ae642"} #請換成你自己的kaggle認證#請換成你自己的kaggle認證
import json
import zipfile
import os


if not os.path.exists("/root/.kaggle"):
    os.makedirs("/root/.kaggle")
 
with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)
!chmod 600 /root/.kaggle/kaggle.json


if not os.path.exists("/kaggle"):
    os.makedirs("/kaggle")


!kaggle competitions submit -c plant-pathology-2021-fgvc8 -f 'submission.csv' -m 'V1_crop'
'''