In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf

print(f"Tensorflow version ${tf.__version__}")

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print(f"Running on TPU: ${tpu.master()}")
except ValueError:
    print("failed")
    tpu = None
    
if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print(f"REPLICAS: {strategy.num_replicas_in_sync}")

In [None]:
from kaggle_datasets import KaggleDatasets

GCS_DS_PATH = KaggleDatasets().get_gcs_path("tpu-getting-started")
print(GCS_DS_PATH)
!gsutil ls $GCS_DS_PATH

In [None]:
IMAGE_SIZE = [512, 512] # or 224, 331, 512
GCS_PATH = f"{GCS_DS_PATH}/tfrecords-jpeg-{IMAGE_SIZE[0]}x{IMAGE_SIZE[1]}"

TRAINING_FILENAMES = tf.io.gfile.glob(f"{GCS_PATH}/train/*.tfrec")
VALIDATION_FILENAMES = tf.io.gfile.glob(f"{GCS_PATH}/val/*.tfrec")
TEST_FILENAMES = tf.io.gfile.glob(f"{GCS_PATH}/test/*.tfrec")

In [None]:
from tensorflow.data.experimental import AUTOTUNE

CLASSES = ['pink primrose',    'hard-leaved pocket orchid', 
           'canterbury bells', 'sweet pea',     
           'wild geranium',    'tiger lily',           
           'moon orchid',      'bird of paradise', 
           'monkshood',        'globe thistle',         # 00 - 09
           
           'snapdragon',       "colt's foot",               
           'king protea',      'spear thistle', 
           'yellow iris',      'globe-flower',
           'purple coneflower','peruvian lily',
           'balloon flower',   'giant white arum lily', # 10 - 19
           
           'fire lily',        'pincushion flower',
           'fritillary',       'red ginger',
           'grape hyacinth',    'corn poppy',
           'prince of wales feathers', 'stemless gentian',
           'artichoke',        'sweet william',         # 20 - 29
           
           'carnation',        'garden phlox',
           'love in the mist', 'cosmos',
           'alpine sea holly', 'ruby-lipped cattleya',
           'cape flower',      'great masterwort',
           'siam tulip',       'lenten rose',           # 30 - 39
           
           'barberton daisy',  'daffodil',
           'sword lily',       'poinsettia',
           'bolero deep blue', 'wallflower',
           'marigold',         'buttercup',
           'daisy',            'common dandelion',      # 40 - 49
           
           'petunia',          'wild pansy',
           'primula',          'sunflower',
           'lilac hibiscus',   'bishop of llandaff',
           'gaura',            'geranium',
           'orange dahlia',    'pink-yellow dahlia',    # 50 - 59
           
           'cautleya spicata', 'japanese anemone',
           'black-eyed susan', 'silverbush',
           'californian poppy','osteospermum',
           'spring crocus',    'iris',
           'windflower',       'tree poppy',            # 60 - 69
           
           'gazania',          'azalea',
           'water lily',       'rose',
           'thorn apple',      'morning glory',
           'passion flower',   'lotus',
           'toad lily',        'anthurium',             # 70 - 79
           
           'frangipani',       'clematis',
           'hibiscus',         'columbine',
           'desert-rose',      'tree mallow',
           'magnolia',         'cyclamen ',
           'watercress',       'canna lily',            # 80 - 89
           
           'hippeastrum ',     'bee balm',
           'pink quill',       'foxglove',
           'bougainvillea',    'camellia',
           'mallow',           'mexican petunia',
           'bromelia',         'blanket flower',        # 90 - 99
           
           'trumpet creeper',  'blackberry lily',
           'common tulip',     'wild rose']                                                                                                                                               # 100 - 102

def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32)/255.0
    image = tf.reshape(image, [*IMAGE_SIZE,3])
    return image

def read_labeled_tfrecord(example):
    LABELED_TFR_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "class": tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(example, LABELED_TFR_FORMAT)
    image = decode_image(example["image"])
    label = tf.cast(example["class"], tf.int32)
    return image, label

def read_unlabeled_tfrecord(example):
    UNLABELED_TFR_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "id": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFR_FORMAT)
    image = decode_image(example["image"])
    idnum = example["id"]
    return image, idnum

def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed
    
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE)
    dataset = dataset.with_options(ignore_order)
    if labeled:
        dataset = dataset.map(read_labeled_tfrecord, num_parallel_calls=AUTOTUNE)
    else:
        dataset = dataset.map(read_unlabeled_tfrecord, num_parallel_calls=AUTOTUNE)
    return dataset

In [None]:
import re
import random
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

rng = tf.random.Generator.from_seed(4711, alg='philox')

def data_augment(image, label):
    # fill here with data augmentation manipulations later
    seed = rng.make_seeds(2)[0]
    image = tf.image.stateless_random_brightness(image, 0.25, seed)
    image = tf.image.stateless_random_contrast(image, 0.9, 1.1, seed)
    #image = tf.image.stateless_random_hue(image, 0.1, seed)
    #image = tf.image.stateless_random_saturation(image, 1, 1.5, seed)
    image = tf.image.stateless_random_flip_left_right(image, seed)
    image = tf.image.stateless_random_flip_up_down(image, seed)
    coeff1 = random.uniform(0.7, 0.9)
    coeff2 = random.uniform(0.7, 0.9)
    crop = [int(IMAGE_SIZE[0]*coeff1), int(IMAGE_SIZE[1]*coeff2)]
    image = tf.image.stateless_random_crop(image, (crop[0], crop[1],3),
                                          seed)
    image = tf.image.resize(image, [IMAGE_SIZE[0], IMAGE_SIZE[1]])
    return image, label

def get_training_dataset():
    dataset = load_dataset(TRAINING_FILENAMES, labeled=True)
    dataset = dataset.map(data_augment, num_parallel_calls=AUTOTUNE)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

def get_validation_dataset(ordered=False):
    dataset = load_dataset(VALIDATION_FILENAMES, labeled=True, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.cache()
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset
    
def get_test_dataset(ordered=False):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
NUM_VALIDATION_IMAGES = count_data_items(VALIDATION_FILENAMES)
NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)

print(f"""
{NUM_TRAINING_IMAGES} training images
{NUM_VALIDATION_IMAGES} validation images
{NUM_TEST_IMAGES} unlabeled testing images""")

In [None]:
ds_train = get_training_dataset()
ds_val = get_validation_dataset()
ds_test = get_test_dataset()

print(f"""
Training data: {ds_train}
Validation data: {ds_val}
Test data: {ds_test}
""")

In [None]:
print("Training data shapes:")
for image, label in ds_train.take(3):
    print(image.numpy().shape, label.numpy().shape)
print(f"Training data labels: \n{label.numpy()}")

In [None]:
print("Test data shapes:")
for image, idnum in ds_test.take(3):
    print(image.numpy().shape, idnum.numpy().shape)
print(f"Test data labels: \n{idnum.numpy().astype('U')}")

In [None]:
from matplotlib import pyplot

image, label = next(ds_train.take(1).as_numpy_iterator())
pyplot.figure(figsize=(20,20))
for i in range(100):
    pyplot.subplot(10,10,i+1)
    pyplot.imshow(image[i,:,:,:])
    pyplot.axis("off")
    pyplot.title(CLASSES[label[i]])

In [None]:
!pip install -q efficientnet

In [None]:
from tensorflow import keras
#from tensorflow.keras.applications import DenseNet201
from efficientnet.tfkeras import EfficientNetB7
with strategy.scope():
    base_model =EfficientNetB7(
    include_top=False,
    weights='noisy-student',
    input_shape=[*IMAGE_SIZE, 3]
    )
    base_model.trainable = False
    model = tf.keras.Sequential([
        base_model,
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(2048, activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1024, activation='relu'),
        keras.layers.Dropout(0.5),
        #keras.layers.Dense(1024, activation='elu'),
        #keras.layers.Dropout(0.5),
        #keras.layers.Dense(512, activation='elu'),
        tf.keras.layers.Dense(len(CLASSES), activation='softmax')
    ])
print(model.summary())
keras.utils.plot_model(model)

from tensorflow import keras

with strategy.scope():
    inputs = keras.Input(shape=[*IMAGE_SIZE, 3])
    conv1 = keras.layers.Conv2D(256, kernel_size=4, activation='relu')(model)
    pool1 = keras.layers.MaxPooling2D(pool_size=(2,2))(conv1)
    conv2 = keras.layers.Conv2D(128, kernel_size=4, activation='relu')(pool1)
    pool2 = keras.layers.MaxPooling2D(pool_size=(2,2))(conv2)
    conv3 = keras.layers.Conv2D(64, kernel_size=4, activation='relu')(pool2)
    pool3 = keras.layers.MaxPooling2D(pool_size=(2,2))(conv3)
    conv4 = keras.layers.Conv2D(64, kernel_size=4, activation='relu')(pool3)
    pool4 = keras.layers.MaxPooling2D(pool_size=(2,2))(conv4)
    flat = keras.layers.Flatten()(pool4)
    hidden = keras.layers.Dense(128, activation='relu')(flat)
    outputs = keras.layers.Dense(len(CLASSES), activation="softmax")(hidden)
    model = keras.Model(inputs=inputs, outputs=outputs)
print(model.summary())
keras.utils.plot_model(model)

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001,beta_1=0.9,beta_2=0.999,amsgrad=False),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy']
)

In [None]:
LR_START = 0.0001
LR_MAX = 0.00005 * strategy.num_replicas_in_sync
LR_MIN = 0.00001
LR_RAMPUP_EPOCHS = 4
LR_SUSTAIN_EPOCHS = 6
LR_EXP_DECAY = 0.8

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = np.random.random_sample()*LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY ** (epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr

local_save_options = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
lr_callback = keras.callbacks.LearningRateScheduler(lrfn, verbose=True)
cp_callback = keras.callbacks.ModelCheckpoint(
        "best_model.hdf5",
        monitor="val_sparse_categorical_accuracy",
        save_best_only=True,
        verbose=1,
        options=local_save_options
    )

In [None]:
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE
histories = []

In [None]:
EPOCHS = 500
#STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE

histories.append(model.fit(
    ds_train,
    validation_data=ds_val,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    callbacks=[lr_callback, cp_callback]
))
model.load_weights("best_model.hdf5")
#16/16: 0.87123

In [None]:
pyplot.figure(figsize=(16,8))
pyplot.plot(np.hstack([history.history["loss"] for history in histories]))
pyplot.plot(np.hstack([history.history["val_loss"] for history in histories]))
pyplot.title("Loss vs. Validation Loss")
pyplot.xlabel("Epoch")
pyplot.legend(["train", "valid."])

In [None]:
pyplot.figure(figsize=(16,8))
pyplot.plot(np.hstack([history.history["sparse_categorical_accuracy"] 
                  for history in histories]))
pyplot.plot(np.hstack([history.history["val_sparse_categorical_accuracy"]
                 for history in histories]))
pyplot.title("Accuracy vs. Validation Accuracy")
pyplot.xlabel("Epoch")
pyplot.legend(["train", "valid."])

In [None]:
ds_test = get_test_dataset(ordered=True)
ds_test_images = ds_test.map(lambda image,idnum: image)
ds_test_idnums = ds_test.map(lambda image,idnum: idnum)

print("Computing predictions...")
probabilities = model.predict(ds_test_images)
predictions = np.argmax(probabilities, axis=-1)
print(predictions)

In [None]:
submission = pd.DataFrame({
    'id': next(iter(ds_test_idnums.unbatch().batch(NUM_TEST_IMAGES))).numpy().astype("U"),
    'label': predictions
}).set_index('id')
submission

In [None]:
submission.to_csv("submission.csv")