# Statistic

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
IMAGE_PATH = "../input/plant-pathology-2020-fgvc7/images/"
TEST_PATH = "../input/plant-pathology-2020-fgvc7/test.csv"
TRAIN_PATH = "../input/plant-pathology-2020-fgvc7/train.csv"
SUB_PATH = "../input/plant-pathology-2020-fgvc7/sample_submission.csv"

sample_submission = pd.read_csv(SUB_PATH)
test = pd.read_csv(TEST_PATH)
train = pd.read_csv(TRAIN_PATH)

In [None]:
print(train.head())
print(train['image_id'].count())
print(train.describe())

In [None]:
print('Healthy 0, diseases 1: \n',train['healthy'].value_counts()/train['image_id'].count())
print('\nHealthy 0, multiple_diseases 1: \n',train['multiple_diseases'].value_counts()/train['image_id'].count())
print('\nHealthy 0, rust 1: \n',train['rust'].value_counts()/train['image_id'].count())
print('\nHealthy 0, scab 1: \n',train['scab'].value_counts()/train['image_id'].count())

In [None]:
print(test.head())
print(test['image_id'].count())
print(test.describe())

In [None]:
print(sample_submission.head())
print(sample_submission['image_id'].count())
print(sample_submission.describe())

# Image processing

In [None]:
import matplotlib.pyplot as plt
import cv2 # Open cv

In [None]:
def loadimage(data_input,nb=None):
    """
    @Input
        data_input: data
        nb: nb of image load
    @Output
        data_output: dataset
    """
    data_output = []
    i=0
    if nb is None:
        nb,_=np.shape(data_input)
        
    for name in data_input["image_id"]:
        path = '../input/plant-pathology-2020-fgvc7/images/'+name+'.jpg'
        img=cv2.imread(path)
        data_output.append(img)
        if i>nb:
            break
        i=i+1
    return data_output
        
def generalpreprocessing(data_input,function,cmap=None):
    """
    @Input
        data_input: data
        function: function to apply to all the dataset
    @Output
        data_output: transformation of data/preprocessing
    """
    data_output = []
    for img in data_input:
        image = function(img)
        data_output.append(image) # listing tha datas
    return data_output

def plotNimage(data,n,titre=None,cmap=None):
    fig, axs = plt.subplots(1, n,figsize=(20,20))
    i=0
    for ax in axs:
        ax.set_axis_off()
        ax.imshow(data[i], cmap = cmap)
        i=i+1
    plt.title(titre)
    plt.show()

In [None]:
#Set for image processing
nb_image=4
img_trainBGR = loadimage(train,nb_image)
img_trainRGB = generalpreprocessing(img_trainBGR,lambda img: cv2.cvtColor(img, cv2.COLOR_BGR2RGB) )#convert in BGR to RGB
plotNimage(img_trainRGB,nb_image)   

In [None]:
#Color-space RGB
img_H = generalpreprocessing(img_trainRGB,lambda img: img[:,:,0])# select R canal
plotNimage(img_H,nb_image,'Channel R',cmap='gray')  
img_S = generalpreprocessing(img_trainRGB,lambda img: img[:,:,1])# select G canal
plotNimage(img_S,nb_image,'Channel G',cmap='gray')  
img_V = generalpreprocessing(img_trainRGB,lambda img: img[:,:,2])# select B canal
plotNimage(img_V,nb_image, 'Channel B',cmap='gray')  


In [None]:
# Color-space HSV
img_hsv = generalpreprocessing(img_trainBGR,lambda img: cv2.cvtColor(img, cv2.COLOR_BGR2HSV))# HSV

img_H = generalpreprocessing(img_hsv,lambda img: img[:,:,0])# select H canal
plotNimage(img_H,nb_image,'Channel H',cmap='gray')  
img_S = generalpreprocessing(img_hsv,lambda img: img[:,:,1])# select S canal
plotNimage(img_S,nb_image,'Channel S',cmap='gray')  
img_V = generalpreprocessing(img_hsv,lambda img: img[:,:,2])# select V canal
plotNimage(img_V,nb_image,'Channel V',cmap='gray')  

In [None]:
# Canny filter: derivative + gaussian
img_canny = generalpreprocessing(img_trainBGR,lambda img: cv2.Canny(img,threshold1=100,threshold2=100))# canny filter good for some not for all
plotNimage(img_canny,nb_image,'Canny',cmap='gray')

In [None]:
# Fourier transform
def FT(img):
    f = np.fft.fft2(img)
    fshift = np.fft.fftshift(f)
    return fshift

img_grey = generalpreprocessing(img_trainBGR,lambda img: cv2.cvtColor(img, cv2.COLOR_BGR2GRAY))
img_FT = generalpreprocessing(img_grey,lambda img: FT(img))#input in CNN
img_magFT=20*np.log(np.abs(img_FT)+0.01)#for plot only
plotNimage(img_magFT,nb_image,'Fourier transform',cmap='gray')

In [None]:
# Inverse Fourier transform #allow us to use filter in frequency field
def invFT(img_in):
    f_ishift = np.fft.ifftshift(img_in)
    img_back = np.fft.ifft2(f_ishift)
    img_out = np.real(img_back)
    return img_out
img_back = generalpreprocessing(img_FT,lambda img: invFT(img))
plotNimage(img_back,nb_image,'Image back',cmap='gray')

In [None]:
print('Check dimension after fourier transform')
print(np.shape(img_FT[0]))
print(np.shape(img_back[0]))

# DenseNet 121

In [None]:
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model,Input
from keras.layers import Dense,Conv2D,Dropout,BatchNormalization,Activation,GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split
from keras.applications import DenseNet121

#To compare with: https://www.kaggle.com/shawon10/plant-pathology-classification-using-densenet121/

In [None]:
x = train['image_id']

In [None]:
img_size=150

In [None]:
train_image=[]
for name in train['image_id']:
    path='/kaggle/input/plant-pathology-2020-fgvc7/images/'+name+'.jpg'
    img=cv2.imread(path)
    image=cv2.resize(img,(img_size,img_size),interpolation=cv2.INTER_AREA)
            
    #Grey scale
    img_grey= cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #Fourier transform
    f = np.fft.fft2(img_grey)
    fshift = np.fft.fftshift(f)
    img_magFT=20*np.log(np.abs(fshift)+0.01)

    #Normalise channel
    image=image/np.max(image)
    img_magFT=img_magFT/np.max(img_magFT)

    #Add FT channel
    imgbis=cv2.merge((image[:,:,0],image[:,:,1],image[:,:,2],#BGR
                      img_magFT[:,:])) #FT
                      
    train_image.append(imgbis)

In [None]:
test_image=[]
for name in test['image_id']:
    path='/kaggle/input/plant-pathology-2020-fgvc7/images/'+name+'.jpg'
    img=cv2.imread(path)
    image=cv2.resize(img,(img_size,img_size),interpolation=cv2.INTER_AREA)
     
    #Grey scale
    img_grey= cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #Fourier transform
    f = np.fft.fft2(img_grey)
    fshift = np.fft.fftshift(f)
    img_magFT=20*np.log(np.abs(fshift)+0.01)

    #Normalise channel
    image=image/np.max(image)
    img_magFT=img_magFT/np.max(img_magFT)

    #Add FT channel
    imgbis=cv2.merge((image[:,:,0],image[:,:,1],image[:,:,2],#BGR
                      img_magFT[:,:])) #FT

    test_image.append(imgbis)

In [None]:
#from keras.preprocessing.image import img_to_array
X_Train = np.ndarray(shape=(len(train_image), img_size, img_size, 4),dtype = np.float32)
i=0
for image in train_image:
    #X_Train[i]=img_to_array(image)
    X_Train[i]=train_image[i]
    i=i+1
print('Train Shape: {}'.format(X_Train.shape))

In [None]:
X_Test = np.ndarray(shape=(len(test_image), img_size, img_size, 4),dtype = np.float32)
i=0
for image in test_image:
    #X_Test[i]=img_to_array(image)
    X_Test[i]=test_image[i]
    i=i+1
print('Test Shape: {}'.format(X_Test.shape))

In [None]:
y = train.copy()
del y['image_id']
y.head()

In [None]:
y_train = np.array(y.values)
print(y_train.shape,y_train[0])

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(X_Train, y_train, test_size=0.2, random_state=42)

In [None]:
def build_densenet():
    densenet = DenseNet121(weights='imagenet', include_top=False)

    input = Input(shape=(img_size, img_size, 4))
    x = Conv2D(3, (3, 3), padding='same')(input)
    
    x = densenet(x)
    
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

    # multi output
    output = Dense(4,activation = 'softmax', name='root')(x)
 

    # model
    model = Model(input,output)
    
    optimizer = Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=0.1, decay=0.0)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    model.summary()
    
    return model

In [None]:
model = build_densenet()

In [None]:
annealer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, verbose=1, min_lr=1e-3)
checkpoint = ModelCheckpoint('model.h5', verbose=1, save_best_only=True)
# Generates batches of image data with data augmentation
datagen = ImageDataGenerator(horizontal_flip=True, # Randomly flip inputs horizontally
                             vertical_flip=True) # Randomly flip inputs vertically
datagen.fit(X_train)

In [None]:
# Fits the model on batches with real-time data augmentation
hist = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=32),
               steps_per_epoch=X_train.shape[0] // 32,
               epochs=50,
               verbose=2,
               callbacks=[annealer, checkpoint],
               validation_data=(X_val, Y_val))

In [None]:
print(hist.history.keys())
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
predict = model.predict(X_Test)
all_predict = np.ndarray(shape = (test.shape[0],4),dtype = np.float32)
for i in range(0,test.shape[0]):
    for j in range(0,4):
        if predict[i][j]==max(predict[i]):
            all_predict[i][j] = 1
        else:
            all_predict[i][j] = 0 

In [None]:
healthy = [y_test[0] for y_test in all_predict]
multiple_diseases = [y_test[1] for y_test in all_predict]
rust = [y_test[2] for y_test in all_predict]
scab = [y_test[3] for y_test in all_predict]

In [None]:
df = {'image_id':test.image_id,'healthy':healthy,'multiple_diseases':multiple_diseases,'rust':rust,'scab':scab}

# Submission

In [None]:
data = pd.DataFrame(df)
data.tail()

In [None]:
data.to_csv('submission.csv',index = False)