# Setup

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from kaggle_datasets import KaggleDatasets
from tensorflow import keras
from functools import partial
from sklearn.model_selection import train_test_split
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout
# from tensorflow.keras.applications import EfficientNetB0
# from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.applications import EfficientNetB5
from keras.callbacks import EarlyStopping
from pathlib import Path

print("Tensorflow version " + tf.__version__)

In [None]:
root_path = '../input/cassava-leaf-disease-classification/'

# Read the input data

In [None]:
train = pd.read_csv(root_path + 'train.csv')
train['label'] = train['label'].astype('string')
train.sample(5)

# Disease catagory

In [None]:
names_of_disease = pd.read_json(root_path + 'label_num_to_disease_map.json', typ='series')
names_of_disease

In [None]:
from PIL import Image,ImageFilter
import os

plt.figure(figsize=(16, 12))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    image = Image.open(root_path + 'train_images/' + train.iloc[i]['image_id'])
    print(root_path + 'train_images/' + train.iloc[i]['image_id'])
    array = np.array(image)
    plt.imshow(array)
    label=train.iloc[i]['label']
    print(label)
    plt.title(f'{names_of_disease[int(label)]}')
    break
plt.show()

  
#Read image



In [None]:
sizes = []
for i in range(1, len(train), 250):
    image = Image.open(root_path + 'train_images/' + train.iloc[i]['image_id'])
    array = np.array(image)
    sizes.append(array.shape)
print('Picture size', set(sizes))

In [None]:
128*800/600.0

In [None]:
# img_width, img_height = 224, 224
# img_width, img_height = 128, 128
# img_width, img_height = 164, 164
img_width, img_height = 256, 256

# Check Data input Distribution

In [None]:
train['label'].value_counts(normalize=True)

# Training

In [None]:
datagen = ImageDataGenerator(validation_split=0.2,
                             vertical_flip=True,
                             horizontal_flip=True,
                             rotation_range=90,
                             brightness_range=[0.5,1.0],
                             shear_range=25,
                             zoom_range=[0.5,1.0]
                            )                            

train_datagen_flow = datagen.flow_from_dataframe(
    dataframe=train,
    directory=root_path + 'train_images',
    x_col='image_id',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=32,
    subset='training',
    shuffle = True,
    #seed=12345,
    class_mode='categorical'
)


# Test (Validating)

In [None]:
valid_datagen_flow = datagen.flow_from_dataframe(
    dataframe=train,
    directory=root_path + 'train_images',
    x_col='image_id',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=32,
    subset='validation',
    #seed=12345,
    class_mode = 'categorical',
    shuffle = True
)

Build the Model

# Running the model

### * Version 2: 856/856 [==============================] - 1894s 2s/step - loss: 1.1880 - accuracy: 0.6146 - val_loss: 1.1774 - val_accuracy: 0.6165
**
* Version 3: 856/856 [==============================] - 274s 321ms/step - loss: 0.8554 - accuracy: 0.6764 - val_loss: 0.8232 - val_accuracy: 0.6843
* Version 4: 
    image size 64x64  accuracy: 0.6823
    856/856 [==============================] - 652s 762ms/step - loss: 0.8481 - accuracy: 0.6823 - val_loss: 0.8613 - val_accuracy: 0.6686
* Version 5:
    convolution... filters=64, kernel_size=4
* Version 6: 
    back to basic  accuracy: 0.64  after epocs =8
* Version 8: 
    Early stopping, epocs 100
* Version 9: EfficientNetB0 epocs 10 --> accuracy: 0.7385

* Version 11-13: EfficientNetB0 epocs 50 --> accuracy: 0.84  validation=0.72
* Version 14: EfficientNetB0 epocs 20 --> accuracy:?
* Version 15: TODO: Batchsize 128 and dropout 0.6  make the model worst
* Version 16: Batchsize 32 + flatten +drop+ dense512 +ephocs =10  accuracy: 0.74  validation=0.72 
* Version 17: image size 128x128 accuracy: 0.81  validation=0.79
* Version 18: image size 256x256 , epoch=6 (<8H) occuracy 0.84 validation=0.8441
* version 19: image size 128X170 , epoch=6  occuracy=0.787 Valid=0.787
* version 20: image size 164X164 , epoch=6 , change from B0 to EfficientNetB4
* version 21: image size 164X164 , epoch=8 , change from B0 to EfficientNetB4 -accuaracy 0.81 validation 0.79
* Version 22: image size 164X164 , epoch=6 , change from B4 to EfficientNetB5  accuracy 078 valid=0.79
* Version 23: fix output submission
* version 25: save model file  +epoch=10  accuracy: 0.8186 - val_loss: 0.5699 - val_accuracy: 0.8004  (score 0.772)
* version 26: 256X256 +epoch=10 loss: 0.4181 - accuracy: 0.8583 - val_loss: 0.4416 - val_accuracy: 0.8537  (score 0.862)
* version 26: 256X256 BatchNormalization(32) +epoch=10 got worst
* Version:  use callback for dynamic learn-rate ... callback [... ,lrs]  increase as we goes (big degradation from 0.8 to 0.6)
* use callback for dynamic learn-rate ... callback [... ,lrs]  decrease as we goes 

Base on: https://www.kaggle.com/bununtadiresmenmor/starter-keras-efficientnet?select=sample_submission.csv



In [None]:
early_stopping = EarlyStopping(monitor='val_loss', verbose=1, patience=4)

1. # Model EfficientNetB5

In [None]:
model = Sequential()
model.add(EfficientNetB5(include_top = False, weights = "imagenet",
                        input_shape=(img_width, img_height, 3)))
# model.add(Dropout(0.2))
# model.add(tf.keras.layers.GlobalAveragePooling2D())

model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.AveragePooling2D(pool_size=(3, 3)))


model.add(tf.keras.layers.Flatten())
model.add(Dropout(0.5))
model.add(tf.keras.layers.Dense(512, activation = "relu"))
model.add(tf.keras.layers.Dense(64, activation = "relu"))
model.add(tf.keras.layers.Dense(5, activation = "softmax"))
# model.add(Dropout(0.5))
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
# optimizer = 'adam'
model.compile(optimizer = optimizer,
            loss = "categorical_crossentropy",
            metrics = ["accuracy"])

# from tensorflow.keras.applications import EfficientNetB0

# with strategy.scope():
#     inputs = layers.Input(shape=(img_width, img_height, 3))
#     x = img_augmentation(inputs)
#     outputs = EfficientNetB0(include_top=True, weights=None, classes=NUM_CLASSES)(x)

#     model = tf.keras.Model(inputs, outputs)
#     model.compile(
#         optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
#     )

# model.summary()

# epochs = 40  # @param {type: "slider", min:10, max:100}
# hist = model.fit(ds_train, epochs=epochs, validation_data=ds_test, verbose=2)

In [None]:
model.summary()

In [None]:
from tensorflow.keras import utils

utils.plot_model(model)

In [None]:
from keras.callbacks import LearningRateScheduler
lrs = LearningRateScheduler(my_learning_rate)

In [None]:
def my_learning_rate(epoch, lrate):

 if (epoch < 5) :
  lrate = (1e-3)
 elif (epoch < 10) :
  lrate = 1e-4
 else:
  lrate = 1e-5
    
 return lrate





In [None]:
my_learning_rate(9, 1e-4)

# Run and save model

In [None]:
history = model.fit_generator(train_datagen_flow,
                            epochs = 15,
                            validation_data = valid_datagen_flow,
                             callbacks = [early_stopping,lrs])


model.save('Cassava_model'+'.h5') 

In [None]:
import matplotlib.pyplot as plt


def plot_hist(history):
    plt.plot(history.history["accuracy"])
    plt.plot(history.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()


plot_hist(history)

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot()
history_df.loc[:, ['accuracy', 'val_accuracy']].plot()

# Logs

In [None]:
# import os
# import keras
# RUN_NAME = 'run 1 with 25 nodes'
# logger = keras.callbacks.TensorBoard(
#     log_dir='kaggle/working/logs/ {}'.format(RUN_NAME),
#     histogram_freq=5,
#     write_graph=True
# )
# os.getcwd()
# tf.tensorboard --logdir=/logs/
# tensorboard --logdir=summaries

# Reload the model trained weights

In [None]:
# model.load_weights('model_weights.h5')

In [None]:
# os.path.join(root_path + 'test_images', image_name)

# Predict Test Image

Submission

In [None]:
# Evaluating the model

import keras

final_model = keras.models.load_model('Cassava_model.h5')

submission = pd.DataFrame(columns=['image_id','label'])

for image_name in os.listdir(root_path + 'test_images'):
    image_path = os.path.join(root_path + 'test_images', image_name)
    image = tf.keras.preprocessing.image.load_img(image_path)
    resized_image = image.resize((img_width, img_height))
    numpied_image = np.expand_dims(resized_image, 0)
    tensored_image = tf.cast(numpied_image, tf.float32)
    submission = submission.append(pd.DataFrame({'image_id': image_name,
                                                 'label': final_model.predict_classes(tensored_image)}))

submission

In [None]:
submission.to_csv('submission.csv', index = False)