In [None]:

#Importing Libraries
!pip install --quiet efficientnet

import seaborn as sns
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import cv2 as cv
import random,warnings,math
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers,applications,Sequential,losses
import efficientnet.keras as efn
from kaggle_datasets import KaggleDatasets


warnings.filterwarnings('ignore')

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() 

print("REPLICA: ", strategy.num_replicas_in_sync)

In [None]:
Classes = ['pink primrose',    'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea',     'wild geranium',     'tiger lily',           'moon orchid',              'bird of paradise', 'monkshood',        'globe thistle',         # 00 - 09
           'snapdragon',       "colt's foot",               'king protea',      'spear thistle', 'yellow iris',       'globe-flower',         'purple coneflower',        'peruvian lily',    'balloon flower',   'giant white arum lily', # 10 - 19
           'fire lily',        'pincushion flower',         'fritillary',       'red ginger',    'grape hyacinth',    'corn poppy',           'prince of wales feathers', 'stemless gentian', 'artichoke',        'sweet william',         # 20 - 29
           'carnation',        'garden phlox',              'love in the mist', 'cosmos',        'alpine sea holly',  'ruby-lipped cattleya', 'cape flower',              'great masterwort', 'siam tulip',       'lenten rose',           # 30 - 39
           'barberton daisy',  'daffodil',                  'sword lily',       'poinsettia',    'bolero deep blue',  'wallflower',           'marigold',                 'buttercup',        'daisy',            'common dandelion',      # 40 - 49
           'petunia',          'wild pansy',                'primula',          'sunflower',     'lilac hibiscus',    'bishop of llandaff',   'gaura',                    'geranium',         'orange dahlia',    'pink-yellow dahlia',    # 50 - 59
           'cautleya spicata', 'japanese anemone',          'black-eyed susan', 'silverbush',    'californian poppy', 'osteospermum',         'spring crocus',            'iris',             'windflower',       'tree poppy',            # 60 - 69
           'gazania',          'azalea',                    'water lily',       'rose',          'thorn apple',       'morning glory',        'passion flower',           'lotus',            'toad lily',        'anthurium',             # 70 - 79
           'frangipani',       'clematis',                  'hibiscus',         'columbine',     'desert-rose',       'tree mallow',          'magnolia',                 'cyclamen ',        'watercress',       'canna lily',            # 80 - 89
           'hippeastrum ',     'bee balm',                  'pink quill',       'foxglove',      'bougainvillea',     'camellia',             'mallow',                   'mexican petunia',  'bromelia',         'blanket flower',        # 90 - 99
           'trumpet creeper',  'blackberry lily',           'common tulip',     'wild rose']   
len(Classes)

In [None]:

data_path = KaggleDatasets().get_gcs_path()
!gsutil ls $data_path


In [None]:
train_path = data_path + '/tfrecords-jpeg-512x512/train/'
test_path = data_path + '/tfrecords-jpeg-512x512/test/'
val_path = data_path + '/tfrecords-jpeg-512x512/val/'



In [None]:
train_files = tf.io.gfile.glob(train_path+'*.tfrec')
test_files = tf.io.gfile.glob(test_path+'*.tfrec')
val_files = tf.io.gfile.glob(val_path+'*.tfrec')

In [None]:
image_size = [512,512]
img_height = image_size[0]
img_width = image_size[1]
Epochs = 20
# channel = 3
NUM_TEST_IMAGES = 7382
batch_size = 16 * strategy.num_replicas_in_sync
AUTO = tf.data.experimental.AUTOTUNE

In [None]:
def _parse_image_function(example_proto):
    Labeled_tfrec_format = {
        'image': tf.io.FixedLenFeature([],tf.string),
        'class' : tf.io.FixedLenFeature([],tf.int64)
    }
    features = tf.io.parse_single_example(example_proto, Labeled_tfrec_format)
    image = tf.image.decode_jpeg(features['image'],3)
#     image.set_shape([*image_size,3])
    image = tf.cast(image,tf.float32) / 255.0
    image = tf.reshape(image,[*image_size,3])

    label = tf.cast(features['class'], tf.int32)
#     label = tf.one_hot(label, 10)

    return image, label

In [None]:
def _parse_unlabled_fun(example):
    UNLABELED_TFREC_FORMAT = {
        'image': tf.io.FixedLenFeature([],tf.string),
        'id' : tf.io.FixedLenFeature([],tf.string)
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = tf.image.decode_jpeg(example['image'],3)
    image = tf.cast(image,tf.float32) / 255.0
    image = tf.reshape(image,[*image_size,3])
    idnum = example['id']
    
    return image,idnum
    
    

In [None]:
def read_test_data(channel, channel_name):
    dataset = tf.data.TFRecordDataset(channel)

    image_feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'id': tf.io.FixedLenFeature([], tf.int64),
    }

    dataset = dataset.map(_parse_unlabled_fun)
    dataset = dataset.prefetch(AUTO)
    dataset = dataset.batch(batch_size)
    
    return dataset

In [None]:
import os

In [None]:
def read_dataset(channel, channel_name):

#     filenames = [os.path.join(channel, channel_name + '.tfrecords')]
    dataset = tf.data.TFRecordDataset(channel)

    image_feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }

    dataset = dataset.map(_parse_image_function, num_parallel_calls=10)
    dataset = dataset.prefetch(AUTO)
#     dataset = dataset.repeat(epochs)
    dataset = dataset.shuffle(buffer_size=10 * batch_size)
    dataset = dataset.batch(batch_size, drop_remainder=True)

    return dataset

In [None]:

train_dataset = read_dataset(train_files, 'train')
validation_dataset = read_dataset(val_files, 'validation')

# def show_batch(image_batch):
#     plt.figure(figsize=(5,5))
#     for n in range(1):
#         ax = plt.subplot(1, 1, n+1)
#         plt.imshow(image_batch[n] / 255.0)
#         plt.axis("off")

        
# image_batch, label_batch = next(train_dataset.unbatch().as_numpy_iterator())

# show_batch(image_batch)

In [None]:
for img, label in train_dataset.take(1):
    data = [img[0:16,:,:,:].numpy(),label[0:16].numpy()]

In [None]:
data[0].shape,data[1].shape

In [None]:
rows = 4
cols = 4
fig = plt.figure(figsize  = (10, 10))
for index in range(1, rows * cols + 1):
    ax = fig.add_subplot(rows, cols, index)
    img = data[0][index -1]
    label = data[1][index - 1]
    ax.axis('off')
    plt.imshow(img)
    plt.title(Classes[label])
plt.tight_layout()
plt.show()

In [None]:
num_classes = len(Classes)

In [None]:

LR_START = 0.00001
LR_MAX = 0.00005 * strategy.num_replicas_in_sync
LR_MIN = 0.00001
LR_RAMPUP_EPOCHS = 5
LR_SUSTAIN_EPOCHS = 0
LR_EXP_DECAY = 0.8


def lrfun(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS*epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN)*LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr

In [None]:
EPOCHS = 10

lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfun, verbose = True)

rng = [i for i in range(25 if  EPOCHS < 25 else EPOCHS)]
y = [lrfun(x) for x in rng]
plt.plot(rng, y)
print("Learning rate: {:.3g} to {:.3g} to {:.3g}".format(y[0], max(y), y[-1]))

In [None]:
import tqdm

In [None]:
%%time
with strategy.scope():
    pretrained_model = tf.keras.applications.DenseNet201(weights = 'imagenet', include_top = False, input_shape = [*image_size, 3])
    pretrained_model.trainable = False   
    DN201 = tf.keras.Sequential([
        pretrained_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(len(Classes), activation = 'softmax', dtype = 'float32')
    ])
    DN201.compile( optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['sparse_categorical_accuracy'])

In [None]:
%%time
#DN201.compile( optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['sparse_categorical_accuracy'])
historicalDN201 = DN201.fit( train_dataset, epochs = 10, callbacks = [lr_callback], validation_data = validation_dataset)


In [None]:
training_loss = historicalDN201.history['loss'] 
training_sparse_categorical_accuracy = historicalDN201.history['sparse_categorical_accuracy']
validation_loss = historicalDN201.history['val_loss']
validation_sparse_categorical_accuracy = historicalDN201.history['val_sparse_categorical_accuracy']
epochs = np.arange(EPOCHS)
plt.subplots(1,2)
plt.subplot(1, 2, 1)
plt.plot(epochs, training_loss,label = 'Training Loss')
plt.plot(epochs, validation_loss, label = 'Validation Loss')
plt.xlabel('Epochs')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(epochs, training_sparse_categorical_accuracy,label = 'Training Accuracy')
plt.plot(epochs, validation_sparse_categorical_accuracy, label = 'Validation Accuracy')
plt.xlabel('Epochs')
plt.legend()
plt.show()

In [None]:
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
import tensorflow.keras.layers as layers
import tensorflow.keras.layers.experimental.preprocessing as preprocessing
from tensorflow.keras.applications import InceptionV3
channels = 3
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    pretrained_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape = [*image_size, 3]) 
    pretrained_model.trainable = True
    
    incv3 = tf.keras.Sequential([
        pretrained_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(len(Classes), activation = 'softmax', dtype = 'float32')
    ])
    
    incv3.compile(
        optimizer = 'adam',
        loss = 'sparse_categorical_crossentropy',
        metrics = ['sparse_categorical_accuracy'])

    
historicalincv3 = incv3.fit(
        train_dataset,
        epochs = 20,
        callbacks = [lr_callback],
        validation_data = validation_dataset)

In [None]:
incv3.summary()

In [None]:
test_df = read_test_data(test_files,'test')

print('Predictions...')
test_images_df = test_df.map(lambda image, idnum: image)
probabilities = incv3.predict(test_images_df)
predictions = np.argmax(probabilities, axis=-1)
print(predictions)

In [None]:
new_test_id = test_df.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(new_test_id.batch(NUM_TEST_IMAGES))).numpy().astype('U') # all in one batch
np.savetxt('submission.csv', np.rec.fromarrays([test_ids, predictions]), fmt=['%s', '%d'], delimiter=',', header='id,label', comments='')