In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
data=pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")

In [None]:
data.shape

In [None]:
data.head()

In [None]:
data["image"].head()

In [None]:
import matplotlib.pyplot as plt

In [None]:
train_paths="../input/plant-pathology-2021-fgvc8/train_images/"

In [None]:
img=plt.imread(train_paths+data["image"][4])

In [None]:
img.shape

In [None]:
plt.imshow(img)

In [None]:
data["labels"].unique()

In [None]:
zerofilling=np.zeros((18632,6))

In [None]:
labels=pd.DataFrame(columns=["healthy","scab","frog_eye_leaf_spot","complex","rust","powdery_mildew"],data=zerofilling)

In [None]:
labels.head()

In [None]:
for i in range(data.shape[0]):
    full_lab=data.loc[i,"labels"]
    for j in range(6):
        lab=labels.columns[j]
        if lab in full_lab:
            labels.loc[i,lab]=1

In [None]:
labels.head()

In [None]:
labels.index=data.index

In [None]:
labels.head()

In [None]:
data.drop("labels",axis=1,inplace=True)

In [None]:
data=pd.concat([data,labels],axis=1)

In [None]:
print("Label Counts:\n")
print(labels.sum())
print("\n\nLabel Percentages:\n")
print(100*labels.sum()/data.shape[0])

The data is quite imbalanced.
I'll try a baseline model first, then we'll see.

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255.,
                                   samplewise_center=True, 
                                   samplewise_std_normalization=True,
                                   validation_split = 0.2)

In [None]:
train_generator = train_datagen.flow_from_dataframe(dataframe = data,
                                                   directory = train_paths,
                                                   target_size = (128,128),
                                                   x_col = 'image',
                                                   y_col = list(labels.columns),
                                                   batch_size = 32,
                                                   color_mode = 'rgb',
                                                   class_mode = 'raw',
                                                   subset = 'training')

test_generator = train_datagen.flow_from_dataframe(dataframe = data,
                                                 directory = train_paths,
                                                 target_size = (128,128),
                                                 x_col = 'image',
                                                 y_col = list(labels.columns),
                                                 batch_size = 32,
                                                 color_mode = 'rgb',
                                                 class_mode = 'raw',
                                                 subset = 'validation')

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, BatchNormalization

In [None]:
convnet=Sequential([

    Conv2D(filters=16,kernel_size=5,strides=3,padding="same",activation="relu",name="conv1",input_shape=(128,128,3)),
    BatchNormalization(name="BN1"),
    MaxPool2D(pool_size=(2,2),name="Pool1"),

    Conv2D(filters=32,kernel_size=4,strides=2,padding="same",name="conv2",activation="relu"),
    BatchNormalization(name="BN2"),
    MaxPool2D(pool_size=(2,2),name="Pool2"),

    Conv2D(filters=64,kernel_size=3,strides=1,padding="same",name="conv3",activation="relu"),
    BatchNormalization(name="BN3"),
    MaxPool2D(pool_size=(2,2),name="Pool3"),
    
    Conv2D(filters=32,kernel_size=1,strides=1,padding="valid",name="conv4",activation="relu"),
    BatchNormalization(name="BN4"),

    Flatten(name="Flatten"),
    
    Dense(64,activation="relu",name="FullyConnected1"),
    Dropout(0.3,name="DropOut1"),
    BatchNormalization(name="BN5"),
    
    Dense(32,activation="relu",name="FullyConnected2"),
    Dropout(0.3,name="DropOut2"),
    BatchNormalization(name="BN6"),
    
    Dense(6,activation="sigmoid",name="OutputDense")
])

convnet.compile(optimizer="adam",loss="binary_crossentropy",metrics=['accuracy'])

In [None]:
convnet.summary()

In [None]:
history = convnet.fit(train_generator, validation_data=test_generator, epochs=5)

In [None]:
history2 = convnet.fit(train_generator, validation_data=test_generator, epochs=2)