In [None]:
import math, re , os
import tensorflow as tf
import json
import cv2
import seaborn as sn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#from kaggle_datasets import KaggleDatasets
from tensorflow import keras
from functools import partial 
from sklearn.model_selection import train_test_split
print("tensorflow version" + tf.__version__)

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import models, layers
from PIL import Image

In [None]:
BASE_DIR = "../input/cassava-leaf-disease-classification"

In [None]:
with open(os.path.join(BASE_DIR, "label_num_to_disease_map.json")) as file:
    map_classes = json.loads(file.read())
    map_classes = {int(k) : v for k, v in map_classes.items()}
    
print(json.dumps(map_classes, indent=4))

In [None]:
input_files = os.listdir(os.path.join(BASE_DIR, "train_images"))
print(f"Number of train images: {len(input_files)}")

In [None]:
# shape of 1st 300 images
img_shapes = {}
for image_name in os.listdir(os.path.join(BASE_DIR, "train_images"))[:300]:
    image = cv2.imread(os.path.join(BASE_DIR, "train_images", image_name))
    img_shapes[image.shape] = img_shapes.get(image.shape, 0) + 1 

print(img_shapes)

In [None]:
df_train = pd.read_csv(os.path.join(BASE_DIR , "train.csv"))
df_train["class_name"] = df_train["label"].map(map_classes)

df_train[:10]

In [None]:
plt.figure(figsize=(10, 5))
sn.countplot(y="class_name", data=df_train);

In [None]:
# Main parameters
BATCH_SIZE = 8
STEPS_PER_EPOCH = len(df_train)*0.8 / BATCH_SIZE
VALIDATION_STEPS = len(df_train)*0.2 / BATCH_SIZE
EPOCHS = 20
TARGET_SIZE = 512

In [None]:
df_train.label = df_train.label.astype('str')

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split = 0.2,
                                     rescale = 1./255,
                                     rotation_range = 45,
                                     zoom_range = 0.2,
                                     horizontal_flip = True,
                                     vertical_flip = True,
                                     fill_mode = 'nearest',
                                     shear_range = 0.1,
                                     height_shift_range = 0.1,
                                     width_shift_range = 0.1)

train_generator = train_datagen.flow_from_dataframe(df_train,
                         directory = os.path.join(BASE_DIR, "train_images"),
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")


validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split = 0.2)

validation_generator = validation_datagen.flow_from_dataframe(df_train,
                         directory = os.path.join(BASE_DIR, "train_images"),
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")

In [None]:
def create_model():
    conv_base = EfficientNetB0(include_top = False, weights = None,
                               input_shape = (TARGET_SIZE, TARGET_SIZE, 3))
    model = conv_base.output
    model = layers.GlobalAveragePooling2D()(model)
    model = layers.Dense(5, activation = "softmax")(model)
    model = models.Model(conv_base.input, model)

    model.compile(optimizer = Adam(lr = 0.001),
                  loss = "sparse_categorical_crossentropy",
                  metrics = ["acc"])
    return model

In [None]:
model = create_model()
model.summary()

In [None]:
model.load_weights('../input/weight1/myweights3.h5')

In [None]:
early_stop = EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 
                           patience = 5, mode = 'min', verbose = 1,
                           restore_best_weights = True)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, 
                              patience = 2, min_delta = 0.001, 
                              mode = 'min', verbose = 1)
history = model.fit(
    train_generator,
    steps_per_epoch = STEPS_PER_EPOCH,
    epochs = EPOCHS,
    validation_data = validation_generator,
    validation_steps = VALIDATION_STEPS,
    callbacks = [early_stop, reduce_lr]
)

model.save('./myEffNetB0_512_8__3.h5')
model.save_weights('./myweights4.h5')

In [None]:
ss = pd.read_csv(os.path.join(BASE_DIR, "sample_submission.csv"))
ss

In [None]:
preds = []

for image_id in ss.image_id:
    image = Image.open(os.path.join(BASE_DIR,  "test_images", image_id))
    image = image.resize((TARGET_SIZE, TARGET_SIZE))
    image = np.expand_dims(image, axis = 0)
    preds.append(np.argmax(model.predict(image)))

ss['label'] = preds
ss

In [None]:
ss.to_csv('submission.csv', index = False)