In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import shutil
import json
from PIL import Image

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, VGG16, InceptionResNetV2
from tensorflow.keras.applications.resnet import preprocess_input

In [None]:
train = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')
image_path = '/kaggle/input/cassava-leaf-disease-classification/train_images/'

In [None]:
train.head()

In [None]:
label_to_disease = json.load(open('/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json'))
train['disease'] = train.label.map(label_to_disease)

In [None]:
label_to_disease

In [None]:
# new_train = []
# sets = []; getEx = True
# for i in trainingset:
#     blurr = cv2.GaussianBlur(i,(5,5),0)
#     hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)
#     #GREEN PARAMETERS
#     lower = (25,40,50)
#     upper = (75,255,255)
#     mask = cv2.inRange(hsv,lower,upper)
#     struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))
#     mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
#     boolean = mask>0
#     new = np.zeros_like(i,np.uint8)
#     new[boolean] = i[boolean]
#     new_train.append(new)
    
#     if getEx:
#         plt.subplot(2,3,1);plt.imshow(i) # ORIGINAL
#         plt.subplot(2,3,2);plt.imshow(blurr) # BLURRED
#         plt.subplot(2,3,3);plt.imshow(hsv) # HSV CONVERTED
#         plt.subplot(2,3,4);plt.imshow(mask) # MASKED
#         plt.subplot(2,3,5);plt.imshow(boolean) # BOOLEAN MASKED
#         plt.subplot(2,3,6);plt.imshow(new) # NEW PROCESSED IMAGE
#         plt.show()
#         getEx = False
# new_train = np.asarray(new_train)

# # CLEANED IMAGES
# for i in range(8):
#     plt.subplot(2,4,i+1)
#     plt.imshow(new_train[i])

In [None]:
train.label.value_counts()

In [None]:
Image.open(os.path.join(image_path, train[train.label == 0].image_id.iloc[0]))

In [None]:
train.label = train.label.astype(str)

## Somehow imbalanced on class 3

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.countplot(train['label'], edgecolor='black')
# plt.show()

In [None]:
# train['label'].hist(figsize=(12, 8))

In [None]:
data_generator = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input,
    validation_split=0.25,
)

In [None]:
train_data_loader = data_generator.flow_from_dataframe(
    train,
    directory=image_path,
    x_col="image_id",
    y_col="label",
    target_size=(224, 224),
    subset='training'
)

In [None]:
val_data_loader = data_generator.flow_from_dataframe(
    train,
    directory=image_path,
    x_col="image_id",
    y_col="label",
    target_size=(224, 224),
    subset='validation'
)

In [None]:
model = Sequential([
    InceptionResNetV2(
        include_top=False, 
        weights='../input/keras-pretrained-models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5', 
        pooling='avg', 
        input_shape=(224, 224, 3)
    ),
    
    layers.Dense(5, activation='softmax')
])

In [None]:
# train, valid = train_test_split(train_labels, train_size = 0.8, shuffle = True,
#                                 random_state = 0)
# BATCH_SIZE = 200
# STEPS_PER_EPOCH = len(train) / BATCH_SIZE
# VALIDATION_STEPS = len(valid) / BATCH_SIZE
# EPOCHS = 8


# def create_model():
#     model = Sequential()
#     model.add(layers.Conv2D(32, (5, 5), activation = "relu", 
#                             input_shape=(150, 150, 3)))
#     model.add(layers.MaxPooling2D((2, 2)))
#     model.add(layers.Conv2D(64, (5, 5), activation = "relu"))
#     model.add(layers.MaxPooling2D((2, 2)))
#     model.add(layers.Conv2D(128, (5, 5), activation = "relu"))
#     model.add(layers.MaxPooling2D((2, 2)))
#     model.add(layers.Conv2D(128, (5, 5), activation = "relu"))
#     model.add(layers.MaxPooling2D(2, 2))
#     model.add(layers.Flatten())
#     model.add(layers.Dense(512, activation = "relu"))
#     model.add(layers.Dense(5, activation = "softmax"))

#     model.compile(optimizer = 'rmsprop',
#                   loss = "categorical_crossentropy",
#                   metrics = ["acc"])
#     return model
# model = create_model()
# model.summary()

In [None]:
callbacks = [ReduceLROnPlateau(factor=0.5, patience=5, verbose=1), EarlyStopping(monitor="loss", patience=3, restore_best_weights=True)]

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(train_data_loader, 
          validation_data=val_data_loader, 
          batch_size=1024, epochs=100, 
          callbacks=callbacks)

In [None]:
history_df = pd.DataFrame(model.history.history)
history_df[['loss', 'val_loss']].plot()
history_df[['accuracy', 'val_accuracy']].plot()

In [None]:
test_images = os.listdir('/kaggle/input/cassava-leaf-disease-classification/test_images/')

In [None]:
predict = []

for i in test_images:
    image = Image.open(f'/kaggle/input/cassava-leaf-disease-classification/test_images/{i}')
    image = image.resize((224, 224))
    
    image = preprocess_input(np.asarray(image))
    image = np.expand_dims(image, axis=0)
    
    predict.append(np.argmax(model.predict(image)))

In [None]:
submission = pd.DataFrame({'image_id': test_images, 'label': predict})

In [None]:
submission

In [None]:
submission.to_csv('submission.csv', index=None)