[Inferance link](https://www.kaggle.com/durbin164/efficientnetb4-starting-inferance-score-86)



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
import cv2

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import CategoricalCrossentropy
import tensorflow as tf

from tensorflow.keras.layers import Dense, Dropout,\
        Flatten,GlobalAveragePooling2D,BatchNormalization, Activation

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import layers

In [None]:
WORK_DIR = '../input/cassava-leaf-disease-classification'
os.listdir(WORK_DIR)

Use WANBD = False if you have no account in Wandb.ai 

In [None]:
#setting
SEED = 42
DEBUG = False
WANDB = True
TARGET_SIZE = 300
VALIDATION_SIZE = 0.2
BATCH_SIZE = 24
EPOCHS=40
MODEL_NAME = "EfficentNetB4"

if DEBUG:
    EPOCHS = 3
    TARGET_SIZE = 300
    BATCH_SIZE = 24

In [None]:
df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')

df['image_path'] = df['image_id'].map(lambda x : os.path.join(
    '../input/cassava-leaf-disease-classification/train_images', x
))

df.head()


df['label'] = df['label'].astype('str') # Since we are using inbuilt generator it takes label as string

# X_train, X_valid = train_test_split(df, test_size = test_size, random_state=SEED, shuffle=True)

STEPS_PER_EPOCH = len(df)*(1-VALIDATION_SIZE) / BATCH_SIZE
VALIDATION_STEPS = len(df)*VALIDATION_SIZE / BATCH_SIZE

In [None]:
train_datagen = ImageDataGenerator(validation_split = VALIDATION_SIZE,
                                     preprocessing_function = None,
                                     rotation_range = 20,
                                     zoom_range = [0.5,1.0],
                                     horizontal_flip = True,
                                     vertical_flip = False,
                                     fill_mode = 'nearest',
                                     shear_range = 0.1,
                                     brightness_range=[0.2,1.0],
                                     height_shift_range = 0.1,
                                     width_shift_range = 0.1)

train_generator = train_datagen.flow_from_dataframe(df,
                         directory = os.path.join(WORK_DIR, "train_images"),
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")


validation_datagen = ImageDataGenerator(validation_split = VALIDATION_SIZE)

validation_generator = validation_datagen.flow_from_dataframe(df,
                         directory = os.path.join(WORK_DIR, "train_images"),
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")

In [None]:
if DEBUG:
    t_x, t_y = next(train_generator)
    fig, m_axs = plt.subplots(4, 6, figsize = (32, 16))
    for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()):
        c_ax.imshow(c_x.astype(np.uint8))
        c_ax.set_title(np.argmax(c_y))
        c_ax.axis('off')

In [None]:
if DEBUG:
    t_x, t_y = next(validation_generator)
    fig, m_axs = plt.subplots(4, 6, figsize = (32, 16))
    for (c_x,  c_ax) in zip(t_x, m_axs.flatten()):
        c_ax.imshow(c_x.astype(np.uint8))
        c_ax.set_title(np.argmax(c_y))
        c_ax.axis('off')

In [None]:
def get_model():
    i = tf.keras.layers.Input([None, None, 3], dtype = tf.uint8)
    x = tf.cast(i, tf.float32)
    x = tf.keras.applications.efficientnet.preprocess_input(x)
    core = tf.keras.applications.EfficientNetB4(
                        include_top=False, input_tensor=None,weights='imagenet',
                        input_shape=(TARGET_SIZE, TARGET_SIZE,3)
                            )
    model = core(x)
#     model = core.output
    
    model = layers.GlobalAveragePooling2D()(model)
    model = layers.Dense(5, activation = "softmax")(model)
    
#     model = tf.keras.Model(core.input, outputs = model)
    model = tf.keras.Model(inputs=[i], outputs=[model])
    
    return model

model = get_model()

optimizer = tf.keras.optimizers.Adam(lr = 0.003)

model.compile(
        optimizer=optimizer,
        loss = "sparse_categorical_crossentropy",
        metrics = ["acc"]
    )

model.summary()

In [None]:
if WANDB:
    !pip install --upgrade wandb
    !wandb login enter your wandb access token

    # Init wandb



In [None]:
if WANDB:
    import wandb
    from wandb.keras import WandbCallback
    wandb.init(project="cassava-leaf-disease")
    wandb.run.name= MODEL_NAME


In [None]:
weight_path_save = 'best_model.hdf5'
last_weight_path = 'last_model.hdf5'

checkpoint = ModelCheckpoint(weight_path_save, 
                             monitor= 'val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode= 'min', 
                             save_weights_only = False)
checkpoint_last = ModelCheckpoint(last_weight_path, 
                             monitor= 'val_loss', 
                             verbose=1, 
                             save_best_only=False, 
                             mode= 'min', 
                             save_weights_only = False)


early = EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 
                           patience = 5, mode = 'min', verbose = 1,
                           restore_best_weights = True)

reduceLROnPlat = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, 
                              patience = 1, min_delta = 0.001, 
                              mode = 'min', verbose = 1)

if not WANDB:
    callbacks_list = [checkpoint, checkpoint_last, early, reduceLROnPlat]
else:

    callbacks_list = [WandbCallback(), checkpoint, checkpoint_last, early, reduceLROnPlat]

In [None]:
history = model.fit( train_generator,
                            steps_per_epoch = STEPS_PER_EPOCH,
                            epochs = EPOCHS,
                            validation_data = validation_generator,
                            validation_steps = VALIDATION_STEPS,
                            callbacks = callbacks_list,
                           #class_weight=class_weights_dict
                          )

In [None]:
def plot_hist(hist):
    plt.figure(figsize=(15,5))
    plt.plot(np.arange(EPOCHS), hist.history["acc"], '-o', label='Train Accuracy',color='#ff7f0e')
    plt.plot(np.arange(EPOCHS), hist.history["val_acc"], '-o',label='Val Accuracy',color='#1f77b4')
    plt.xlabel('Epoch',size=14)
    plt.ylabel('Accuracy',size=14)
    plt.legend(loc=2)
    
    plt2 = plt.gca().twinx()
    plt2.plot(np.arange(EPOCHS) ,hist.history['loss'],'-o',label='Train Loss',color='#2ca02c')
    plt2.plot(np.arange(EPOCHS) ,hist.history['val_loss'],'-o',label='Val Loss',color='#d62728')
    plt.legend(loc=3)
    plt.ylabel('Loss',size=14)
    plt.title("Model Accuracy and loss")
    
    #plt.legend(["train", "validation"], loc="upper left")
    
    plt.savefig('loss.png')
    plt.show()
    
plot_hist(history)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

model.load_weights(weight_path_save) ## load the best model or all your metrics would be on the last run not on the best one


pred_valid_y = model.predict(validation_generator,  verbose = True)
pred_valid_y_labels = np.argmax(pred_valid_y, axis=-1)
valid_labels=validation_generator.labels

print(classification_report(valid_labels, pred_valid_y_labels ))

print("****************")
print(confusion_matrix(valid_labels, pred_valid_y_labels ))
