In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Model
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.applications import DenseNet121
from sklearn.model_selection import train_test_split

In [None]:
IMAGE_PATH = KaggleDatasets().get_gcs_path() + "/images/"
TEST_PATH = KaggleDatasets().get_gcs_path() + "/test.csv"
TRAIN_PATH = KaggleDatasets().get_gcs_path() + "/train.csv"
SUB_PATH = KaggleDatasets().get_gcs_path() + "/sample_submission.csv"

IMAGE_SIZE = (512, 512)
IMAGE_SIZE_WITH_CHANNEL = IMAGE_SIZE + (3,)
NUM_CLASS = 4
EPOCHS = 20
BATCH_SIZE = 16

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)

STRATEGY_NUM_REPLICAS_IN_SYNC = strategy.num_replicas_in_sync
BATCH_SIZE = 16 * STRATEGY_NUM_REPLICAS_IN_SYNC

In [None]:
class Data():
    
    def __init__(self):
        test_data = pd.read_csv(TEST_PATH)
        train_data = pd.read_csv(TRAIN_PATH)
        self.test_paths = test_data.image_id.apply(self.format_path).values
        self.train_paths = train_data.image_id.apply(self.format_path).values
        self.train_labels = np.float32(train_data.loc[:, 'healthy':'scab'].values)
        self.train_paths, self.valid_paths, self.train_labels, self.valid_labels =\
        train_test_split(self.train_paths, self.train_labels, test_size=0.15, random_state=2020)
        print(self.valid_labels)
        
    def format_path(self, image_name):
        return IMAGE_PATH + image_name + '.jpg'
    
    def decode_image(self, filename, label=None):
        bits = tf.io.read_file(filename)
        image = tf.image.decode_jpeg(bits, channels=3)
        image = tf.cast(image, tf.float32) / 255.0
        image = tf.image.resize(image, IMAGE_SIZE)
        
        if label is None:
            return image
        else:
            return image, label

    def data_augment(self, image, label=None):
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        
        if label is None:
            return image
        else:
            return image, label
        
    def process(self):
        train_dataset = (
            tf.data.Dataset
            .from_tensor_slices((self.train_paths, self.train_labels))
            .map(self.decode_image, num_parallel_calls=AUTO)
            .map(self.data_augment, num_parallel_calls=AUTO)
            .repeat()
            .shuffle(512)
            .batch(BATCH_SIZE)
            .prefetch(AUTO)
        )

        valid_dataset = (
            tf.data.Dataset
            .from_tensor_slices((self.valid_paths, self.valid_labels))
            .map(self.decode_image, num_parallel_calls=AUTO)
            .batch(BATCH_SIZE)
            .cache()
            .prefetch(AUTO)
        )
        
        test_dataset = (
            tf.data.Dataset
            .from_tensor_slices(self.test_paths)
            .map(self.decode_image, num_parallel_calls=AUTO)
            .batch(BATCH_SIZE)
        )
        
        return (train_dataset, valid_dataset, test_dataset)
    
    def get_label_len(self):
        return len(self.train_labels)

In [None]:
IMAGE_PATH = KaggleDatasets().get_gcs_path() + "/images/"
TEST_PATH = KaggleDatasets().get_gcs_path() + "/test.csv"
TRAIN_PATH = KaggleDatasets().get_gcs_path() + "/train.csv"
SUB_PATH = KaggleDatasets().get_gcs_path() + "/sample_submission.csv"

IMAGE_SIZE = (512, 512)
IMAGE_SIZE_WITH_CHANNEL = IMAGE_SIZE + (3,)
NUM_CLASS = 4
EPOCHS = 20
BATCH_SIZE = 16

In [None]:
class LearningRateScheduler:
    def __init__(self, lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):
        self.lr_start=lr_start
        self.lr_max=lr_max * STRATEGY_NUM_REPLICAS_IN_SYNC
        self.lr_min=lr_min
        self.lr_rampup_epochs=lr_rampup_epochs
        self.lr_sustain_epochs=lr_sustain_epochs
        self.lr_exp_decay=lr_exp_decay
        
    def scheduler(self, epoch):
        if epoch < self.lr_rampup_epochs:
            lr = (self.lr_max - self.lr_start) / self.lr_rampup_epochs * epoch + self.lr_start
        elif epoch < self.lr_rampup_epochs + self.lr_sustain_epochs:
            lr = self.lr_max
        else:
            lr = (self.lr_max - self.lr_min) *\
                 self.lr_exp_decay**(epoch - self.lr_rampup_epochs\
                                - self.lr_sustain_epochs) + self.lr_min
        return lr
        
    def get_learning_rate_scheduler(self):
        return tf.keras.callbacks.LearningRateScheduler(self.scheduler, verbose=1) 

In [None]:
data = Data()
train_dataset, valid_dataset, test_dataset = data.process()
steps_per_epoch = data.get_label_len() // BATCH_SIZE

In [None]:
scheduler = LearningRateScheduler()

In [None]:
with strategy.scope():
    model = tf.keras.Sequential([DenseNet121(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 layers.GlobalAveragePooling2D(),
                                 layers.Dense(NUM_CLASS,
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()
    
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[scheduler.get_learning_rate_scheduler()],
                    steps_per_epoch=steps_per_epoch,
                    validation_data=valid_dataset)

In [None]:
probs_dnn = model.predict(test_dataset, verbose=1)
sub = pd.read_csv(SUB_PATH)
sub.loc[:, 'healthy':] = probs_dnn
sub.to_csv('submission_dnn.csv', index=False)
sub.head()