In [None]:
import pandas as pd
import os
import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(physical_devices)
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
data_dir = "/kaggle/input/cassava-leaf-disease-classification"

In [None]:
df_label_indexes = pd.read_json(data_dir + "/label_num_to_disease_map.json", orient="index")

In [None]:
labels = df_label_indexes.values.flatten().tolist()

labels

In [None]:
train_images = data_dir + "/train_images"
test_images = data_dir + "/test_images"

In [None]:
df = pd.read_csv(data_dir + "/train.csv")

In [None]:
df["label"] = df["label"].astype("string")  # for Keras flow_from_dataframe

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

training_datagen = ImageDataGenerator(
    
    #preprocessing_function=preprocess_input,
    rescale = 1/255,
    rotation_range = 100,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.3,
    brightness_range = [0.7, 1.4],
    horizontal_flip = True,
    vertical_flip=True,
    fill_mode = "nearest",
    validation_split=0.2
)

validation_datagen = ImageDataGenerator(
    #preprocessing_function=preprocess_input,
    rescale = 1/255,
    validation_split=0.2
)


In [None]:
BATCH_SIZE = 128
IMG_WIDTH = 300
IMG_HEIGHT = 300
CHANNEL = 3

print("\nTraining Dataset")
train_ds = training_datagen.flow_from_dataframe(
    df,
    train_images,
    target_size = (IMG_WIDTH, IMG_HEIGHT),
    class_mode = "categorical",
    batch_size = BATCH_SIZE,
    x_col = "image_id",
    y_col = "label",
    shuffle = True,
    subset = "training"

)
print("\nValidation Dataset")
validation_ds = validation_datagen.flow_from_dataframe(
    df,
    train_images,
    target_size = (IMG_WIDTH, IMG_HEIGHT),
    class_mode = "categorical",
    batch_size = BATCH_SIZE,
    x_col = "image_id",
    y_col = "label",
    shuffle = False,
    subset = "validation"
)
print("\nClass Indices:")
print(train_ds.class_indices)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

model = Sequential([
    layers.Conv2D(64, 3, padding="same", activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
    layers.MaxPooling2D(2),
    layers.Conv2D(128, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(2),
    layers.Conv2D(128, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(2),
#    layers.Dropout(0.5),
    layers.Conv2D(256, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(2),
#    layers.Dropout(0.5),
    layers.Conv2D(256, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(2),
    layers.Conv2D(256, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(2),
#    layers.Dropout(0.5),
    layers.Conv2D(512, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(2),
    layers.Conv2D(512, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(2),
    layers.Dropout(0.5),
    
    layers.Flatten(),
    layers.Dense(512, activation="relu"),
    layers.Dense(5, activation="softmax"),
    
])

In [None]:
model.summary()

In [None]:
# Saves checkpoint at the end of each epoch.
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("cassava_model.h5", save_best_only=True)

# For resuming training.
# model = tf.keras.models.load_model("cassava_model.h5")

# Early Stopping
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

# For Tensorboard

# define root log directory
root_logdir = os.path.join(os.curdir, "cassava_logs")

os.makedirs(root_logdir, exist_ok=True)

def get_run_logdir():
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir()

tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)

In [None]:
model.compile(
    optimizer='adam', 
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), 
    metrics=['accuracy']
)

In [None]:
epochs = 25

history = model.fit(
    train_ds, 
    validation_data=validation_ds, 
    epochs=epochs,
    callbacks = [checkpoint_cb, early_stopping_cb, tensorboard_cb]
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
model.save("/kaggle/working/cassava.h5")

In [None]:


ss = pd.read_csv(data_dir + '/sample_submission.csv')

ss.head()

In [None]:
import numpy as np

preds = []

loaded_model = tf.keras.models.load_model("/kaggle/working/cassava.h5")

for image in ss.image_id:
    img = tf.keras.preprocessing.image.load_img(data_dir + '/test_images/' + image)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = tf.keras.preprocessing.image.smart_resize(img, (IMG_WIDTH, IMG_HEIGHT))
    img = tf.reshape(img, (-1, IMG_WIDTH, IMG_HEIGHT, 3))
    prediction = loaded_model.predict(img/255.)
    print("Predictions: ", prediction)
    print(np.argmax(prediction))
    preds.append(np.argmax(prediction))

    
labels[2]

In [None]:
my_submission = pd.DataFrame({'image_id': ss.image_id, 'label': preds})

ss.image_id, preds

In [None]:
my_submission.to_csv('submission.csv', index=False)