In [None]:
import os, shutil,math,re
import json
from keras import layers, models, optimizers
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from matplotlib import pyplot as plt
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, smart_resize
from keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt
import seaborn as sns
import PIL
from kaggle_datasets import KaggleDatasets

In [None]:
import random
import warnings
def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

seed = 21
seed_everything(seed)
warnings.filterwarnings('ignore')

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print("Running on TPU ", tpu.cluster_spec().as_dict()["worker"])
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()

try: 
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect() 
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError: # detect GPUs
    strategy = tf.distribute.MirroredStrategy() 

print("Number of accelerators: ", strategy.num_replicas_in_sync)

In [None]:
image_size = (512,512)
n_CLASS = 5
BATCH_SIZE = 16 * strategy.num_replicas_in_sync
print('Batch size:', BATCH_SIZE)
epochs=12

In [None]:
gcs_path = KaggleDatasets().get_gcs_path(f'cassava-leaf-disease-classification')
print(gcs_path)

training_files = tf.io.gfile.glob(gcs_path + '/train_tfrecords/*.tfrec')

print('Training tfrecords: '+ str(len(training_files)))


def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

NUM_TRAINING_IMAGES = count_data_items(training_files)

print('Dataset: {} training images '.format(NUM_TRAINING_IMAGES))


In [None]:
work_dir = '../input/cassava-leaf-disease-classification/'
data = pd.read_csv(work_dir + 'train.csv')
with open(work_dir+ 'label_num_to_disease_map.json') as js:
    classes = json.load(js)
print(classes)

In [None]:
def show_label_distribution(label_count):
    fig, ax = plt.subplots(1, 1, figsize=(20, 8))
    ax = sns.countplot(y=label_count, palette='deep')
    ax.tick_params(labelsize=16)
    
show_label_distribution(data['label'].values)

In [None]:
#decode_image - For converting bytestring images into arrays.
#read_labeled_tfrecord - Returns image & label from the tfrecords.
#read_labeled_tfrecord_with_imageid - Returns image, label & image id from the tfrecords.
#read_unlabeled_tfrecord - Returns image & image id.
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)  
    image = tf.reshape(image, [*image_size, 3]) 
    return image


def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "target": tf.io.FixedLenFeature([], tf.int64), 
        "image": tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    
    image = decode_image(example['image'])
    label = tf.cast(example['target'], tf.int32)

    return image, label


def read_labeled_tfrecord_with_imageid(example):
    LABELED_TFREC_FORMAT_WITH_ID = {
        "target": tf.io.FixedLenFeature([], tf.int64),  
        "image_name": tf.io.FixedLenFeature([], tf.string),
        "image": tf.io.FixedLenFeature([], tf.string), 
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT_WITH_ID)
    
    image = decode_image(example['image'])
    label = tf.cast(example['target'], tf.int32)
    image_name = example['image_name']
    
    return image, label, image_name 

def read_unlabeled_tfrecord(example):
    UNLABELED_TFREC_FORMAT = {
        'image_name' : tf.io.FixedLenFeature([], tf.string),
        'image' : tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    
    image = decode_image(example['image'])
    image_name = example['image_name']
    
    return image, image_name

In [None]:
def show_im(fig, row, col, index, path=None, image=None, title=None, title_color='white'):
    if image is not None:
      image = image
    elif path is not None:
      image = PIL.Image.open(path)   
    ax = fig.add_subplot(row, col, index)
    ax.set_xticks([]), ax.set_yticks([])  
    ax.imshow(image)
    
    if title:
        plt.title(title,
                  color=title_color)
        
    fig.tight_layout(pad=0.02)

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

def load_dataset(filenames, labeled=True, ordered=False):

    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 

    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) 
    dataset = dataset.with_options(ignore_order) 
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls=AUTO)

    return dataset

In [None]:
#Splitting The dataset into Training and Testing data
temp_training_ds = load_dataset(training_files, labeled=True, ordered=True)

print(temp_training_ds)

In [None]:
y_targets = np.array([ target.numpy() for _, target in iter(temp_training_ds) ])
X_indices = np.arange(len(y_targets))

In [None]:
from sklearn.model_selection import train_test_split

X_train_indices, X_test_indices, y_train_targets, y_test_targets = train_test_split(
    X_indices, y_targets, test_size=0.05, stratify=y_targets, random_state=53)

print(len(y_train_targets))
print(len(y_test_targets))

In [None]:
def get_selected_dataset(ds, X_indices_np): 
    X_indices_ts = tf.constant(X_indices_np, dtype=tf.int64)
    
    def is_index_in(index, rest):
        return tf.math.reduce_any(index == X_indices_ts)
    
    def drop_index(index, rest):
        return rest

    selected_ds = ds \
        .enumerate() \
        .filter(is_index_in) \
        .map(drop_index)
    
    return selected_ds

In [None]:
splitted_train_ds = get_selected_dataset(temp_training_ds, X_train_indices)
splitted_test_ds = get_selected_dataset(temp_training_ds, X_test_indices)

print(splitted_train_ds)
print(splitted_test_ds)

In [None]:
def data_augment(image, target):
    
    #Random Flipping
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    return image, target

In [None]:
def get_training_dataset():
    dataset = splitted_train_ds
    dataset = dataset.repeat() 
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
    dataset = dataset.prefetch(AUTO) 
    
    return dataset

def get_test_dataset():
    dataset = splitted_test_ds
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.cache()
    dataset = dataset.prefetch(AUTO) 
    
    return dataset

In [None]:
NUM_TRAINING_IMAGES = len(y_train_targets)
NUM_VALIDATION_IMAGES = len(y_test_targets)
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE
print('Dataset: {} training images, {} validation images'.format(
    NUM_TRAINING_IMAGES, NUM_VALIDATION_IMAGES))

In [None]:
print("Training data shape:")
for image, label in get_training_dataset().take(3):
    print(image.numpy().shape, label.numpy().shape)
print("Test data label examples:", label.numpy())

print("Test data shape:")
for image, label in get_test_dataset().take(3):
    print(image.numpy().shape, label.numpy().shape)
print("Test data label examples:", label.numpy())

In [None]:
def dataset_to_numpy_util(dataset, N):
    dataset = dataset.unbatch().batch(N)
    for images, labels in dataset:
        numpy_images = images.numpy()
        numpy_labels = labels.numpy()
        break;
        
    return numpy_images, numpy_labels

def display_single(image,subplot, red=False):
    plt.subplot(subplot)
    plt.axis('off')
    plt.imshow(image)
    return subplot+1
  
def display_sample_images(dataset):
    subplot=331
    plt.figure(figsize=(13,13))
    images, labels = dataset_to_numpy_util(dataset, 9)
    for i, image in enumerate(images):
        subplot = display_single(image,subplot)
        if i >= 8:
            break;
              
    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.show()

In [None]:
display_sample_images(get_test_dataset())

In [None]:
display_sample_images(get_training_dataset())

In [None]:
import keras
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB4
import tensorflow_addons as tfa


with strategy.scope():
    
    #inputs = keras.Input(shape=(*image_size, 3))
    
   #data_augmentation = tf.keras.Sequential([
      #layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
      #layers.experimental.preprocessing.RandomRotation(0.2),
    #])
    loss_function=tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits = False,
        name='sparse_categorical_crossentropy'
    )
    #pre_weights="../input/cassava-leaf-disease-efficientnetb4/efficientnetb4_noisystudent_notop.h5/efficientnetb4_noisystudent_notop.h5"
    
    pre_weights="imagenet"
    
    effnet_b4=EfficientNetB4(input_shape = (*image_size, 3), 
        weights = pre_weights, include_top = False,
        drop_connect_rate=0.4)
    
    #for layer in reversed(effnet_b4.layers):
        #if isinstance(layer, tf.keras.layers.BatchNormalization):
            #layer.trainable = False
        #else:
            #layer.trainable = True
    
    model = keras.Sequential([
        #inputs,
        #data_augmentation,
        effnet_b4,
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Flatten(),
        keras.layers.Dense(len(classes),
            activation='softmax')
    ])

    model.compile(loss= loss_function, 
                  optimizer= keras.optimizers.Adam(lr=1e-3), 
                  metrics= ['accuracy'],
                 )

print(model.summary())

In [None]:
import time
tic = time.time()

checkpoint_cb = ModelCheckpoint(
        "Cassava_best_model.h5",
        save_best_only=True,
        monitor='val_loss',
        mode='min',
    )

learning_rate=ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=2,
        min_lr=1e-6,
        mode='min',
        verbose=1,
    )

es = EarlyStopping(
        monitor='val_loss', 
        mode='min', 
        patience=3,
        restore_best_weights=True, 
        verbose=1,
    )

History=model.fit(
        get_training_dataset(),
        validation_data=get_test_dataset(),
        epochs=25,
        steps_per_epoch=STEPS_PER_EPOCH,
        batch_size=BATCH_SIZE,
        callbacks=[es, checkpoint_cb, learning_rate],
    )

toc = time.time()

In [None]:
print(f"model training took {int((toc - tic) / 60)} minutes")

In [None]:
print(f"Training Accuracy :{ max(History.history['accuracy'])}")
print(f"Testing Accuracy :{ max(History.history['val_accuracy'])}")

In [None]:
def trai_test_plot(acc, test_acc, loss, test_loss):
    
    fig, (ax1, ax2) = plt.subplots(1,2, figsize= (15,10))
    fig.suptitle("Model's metrics comparisson", fontsize=20)

    ax1.plot(range(1, len(acc) + 1), acc)
    ax1.plot(range(1, len(test_acc) + 1), test_acc)
    ax1.set_title('History of Accuracy', fontsize=15)
    ax1.set_xlabel('Epochs', fontsize=15)
    ax1.set_ylabel('Accuracy', fontsize=15)
    ax1.legend(['training', 'validation'])


    ax2.plot(range(1, len(loss) + 1), loss)
    ax2.plot(range(1, len(test_loss) + 1), test_loss)
    ax2.set_title('History of Loss', fontsize=15)
    ax2.set_xlabel('Epochs', fontsize=15)
    ax2.set_ylabel('Loss', fontsize=15)
    ax2.legend(['training', 'validation'])
    plt.show()
    

trai_test_plot(
    History.history['accuracy'],
    History.history['val_accuracy'],
    History.history['loss'],
    History.history['val_loss']
)

In [None]:
from keras.utils import  plot_model
model_plot=EfficientNetB4()
plot_model(model_plot)