In [None]:
import numpy as np 
import pandas as pd 
import os
import seaborn as sns
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
import cv2
import re
import glob
import tensorflow as tf
import tensorflow.keras.backend as K

# TPU/GPU Configuration

In [None]:
# TPU or GPU detection
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print(f'Running on TPU {tpu.master()}')
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

AUTO = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

# Load data

In [None]:
basepath = "../input/cassava-leaf-disease-classification/"
train_images = basepath + 'train_images/'
train_label = pd.read_csv(basepath + 'train.csv')

DATASET = '../input/cassava-leaf-disease-tfrecords-384x384/' 
train_filenames_name = [file for file in os.listdir(DATASET) if file.endswith(".tfrec")]  
train_filenames = [DATASET + tfrec for tfrec in train_filenames_name]

test_filenames_name = [file for file in os.listdir(basepath + 'test_tfrecords/') if file.endswith(".tfrec")]  
test_filenames = [basepath + 'test_tfrecords/' + tfrec for tfrec in test_filenames_name]

model_path_list = glob.glob('../input/cassava-leaf-disease-classon-inceptionv3/*.hdf5')

In [None]:
os.listdir(basepath)

# Hyperparameter Tuning

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
SEED = 42
SIZE = [384,384] 
BATCH_SIZE = 32 * strategy.num_replicas_in_sync
TTA_STEPS = 5
N_CLASSES = 5

def seed_everything(SEED):
    np.random.seed(SEED)
    tf.random.set_seed(SEED) 

seed_everything(SEED)

# Visualize  Image samples

In [None]:
sns.set(rc={'figure.figsize': (12, 5)})

train_count_label = train_label.groupby('label').aggregate({'label': 'count'}).rename(
    columns={'label': 'label_count'}).reset_index()

train_count_label['label'] = train_count_label['label'].map(
    {0: '0: Cassava Bacterial Blight (CBB)',
     1: '1: Cassava Brown Streak Disease (CBSD)', 
     2: '2: Cassava Green Mottle (CGM)',     
     3: '3: Cassava Mosaic Disease (CMD)',
     4: '4: Healthy'}).astype(str)


train_count_label_l = sns.barplot('label', 'label_count', data = train_count_label)
for item in train_count_label_l.get_xticklabels():
    item.set_rotation(65)

In [None]:
def show_img(img_list, cassava):
    plt.figure(figsize=(20, 7))
    pos = 1
    for i in img_list:
        image = cv2.imread(i)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.subplot(2, 3, pos)
        plt.title(cassava)
        plt.imshow(image)
        plt.axis("off")
        pos += 1

In [None]:
for num_label, cassava in zip(list(range(5)), list(train_count_label.label)):
    random_label_number = random.sample(list(train_label[train_label['label'] == num_label].index), 6)
    im = [train_images + jpg for jpg in train_label.iloc[random_label_number].image_id]
    show_img(im, cassava)

# Dataset

In [None]:
def decode_image(image, labeled = True):
    image = tf.image.decode_jpeg(image, channels=3) 
    image = tf.cast(image, tf.float32)/255.0 
    if labeled:
        image = tf.reshape(image, [*SIZE, 3]) 
    else: 
        image = tf.reshape(image, [512,512,3]) 
    return image

def data_augment(image, label=None, seed=SEED):
    #image = tf.image.rot90(image,k=np.random.randint(4))
    #image = tf.image.random_flip_left_right(image, seed=seed) 
    #image = tf.image.random_flip_up_down(image, seed=seed) 
    
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    
    p_flip = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    
    if p_flip >= .8:
        image = tf.image.random_flip_left_right(image) 
    elif p_flip >= .5:   
        image = tf.image.random_flip_up_down(image) 
    else:
        image = tf.image.rot90(image, k=4) 

    if p_pixel_1 >= .4: 
        image = tf.image.random_saturation(image, lower=.7, upper=1.3)
    if p_pixel_2 >= .4: 
        image = tf.image.random_contrast(image, lower=.8, upper=1.2)
    if p_pixel_3 >= .4: 
        image = tf.image.random_brightness(image, max_delta=.1)
    
    if p_rotate > .75:
        image = tf.image.rot90(image, k=3) 
    elif p_rotate > .5:
        image = tf.image.rot90(image, k=2) 
    elif p_rotate > .25:
        image = tf.image.rot90(image, k=1)
    elif p_rotate > .15:
        image = tf.image.rot90(image, k=4) 
    
    if label is None:
        return image
    else:
        return image, label

def read_labeled_tfrecord(example):
    
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), 
        "target": tf.io.FixedLenFeature([], tf.int64),  } 
    
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'], labeled = True)
    label = tf.cast(example['target'], tf.int32) 
    return image, label 

def read_unlabeled_tfrecord(example):
    
    UNLABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string), }
    
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT) 
    image = decode_image(example['image'], labeled = False)
    image_name = example['image_name']
    return image, image_name

def load_dataset(filenames, labeled=True, ordered=False): 
    
    ignore_order = tf.data.Options() 
    if not ordered:
        ignore_order.experimental_deterministic = False
    
    dataset = (tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) 
              .with_options(ignore_order) 
              .map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls=AUTO))
            
    return dataset

def count_data_items(filenames): 
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [None]:
train_filenames, valid_filenames = train_test_split(train_filenames, test_size = 0.2,random_state = SEED)

In [None]:
train_dataset = (load_dataset(train_filenames, labeled=True)
    .map(data_augment, num_parallel_calls=AUTO)
    .shuffle(SEED)
    .batch(BATCH_SIZE,drop_remainder=True)
    .repeat()
    .prefetch(AUTO))


valid_dataset = (load_dataset(valid_filenames, labeled=True)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO))

In [None]:
print(train_dataset)
print(valid_dataset)

# Model

In [None]:
test_dataset = (load_dataset(test_filenames, labeled=False,ordered=True).batch(BATCH_SIZE))  
test_dataset_image_name = test_dataset.map(lambda image, image_name: image_name).unbatch()
num_test_images = count_data_items(test_filenames)
test_ids = next(iter(test_dataset_image_name.batch(num_test_images))).numpy().astype('U')

predictions = np.zeros((num_test_images, N_CLASSES))

In [None]:
def Inception_model(unitsdense):

    with strategy.scope(): 

        model = tf.keras.Sequential([
            tf.keras.applications.InceptionV3(input_shape=(*SIZE, 3), weights=None,pooling='avg',include_top=False),
            tf.keras.layers.Dropout(0.15),
            tf.keras.layers.Dense(150, activation = unitsdense[0]),
            tf.keras.layers.Dense(50, activation = unitsdense[1]),
            tf.keras.layers.Dense(5, activation = 'softmax')
        ])
        
    return model

In [None]:
model1 = Inception_model(['relu', 'relu'])
model2 = Inception_model(['elu', 'elu'])
model3 = Inception_model(['relu', 'elu'])
model4 = Inception_model(['elu', 'relu'])

In [None]:
for pat in range(len(model_path_list)):
    print(model_path_list[pat])
    K.clear_session()
    model1.load_weights(model_path_list[pat])
    model2.load_weights(model_path_list[pat])
    model3.load_weights(model_path_list[pat])
    model4.load_weights(model_path_list[pat])
    
    for step in range(TTA_STEPS):
        
        test_dataset_images = test_dataset.map(lambda image, image_name: image)
        
        predictions += model1.predict(test_dataset_images) / (TTA_STEPS * len(model_path_list))
        predictions += model2.predict(test_dataset_images) / (TTA_STEPS * len(model_path_list))
        predictions += model3.predict(test_dataset_images) / (TTA_STEPS * len(model_path_list))
        predictions += model4.predict(test_dataset_images) / (TTA_STEPS * len(model_path_list))
        
    print(predictions)

In [None]:
predictions = np.argmax(predictions, axis=-1)
submission = pd.DataFrame({'image_id': test_ids, 'label': predictions})  

In [None]:
submission

In [None]:
submission.to_csv('submission.csv', index = False)