In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [None]:
basedir = "../input/tpu-getting-started"
tfrecordsdir = os.path.join(basedir, "tfrecords-jpeg-224x224")
traindir = os.path.join(tfrecordsdir, "train")
testdir = os.path.join(tfrecordsdir, "test")
valdir = os.path.join(tfrecordsdir, "val")
submission_file = os.path.join(basedir, "sample_submission.csv")

In [None]:
IMAGE_SIZE = (224, 224)
IMAGE_SHAPE = IMAGE_SIZE + (3, )
BATCH_SIZE = 32
EPOCHS_INIT = 10
EPOCHS_FINE = 10

In [None]:
def get_tfrecord_ds(dir):
    filenames = tf.io.gfile.glob(os.path.join(dir, "*")) 
    return tf.data.TFRecordDataset(filenames, num_parallel_reads=tf.data.AUTOTUNE)

In [None]:
image_feature_description_train = {
    'class': tf.io.FixedLenFeature([], tf.int64),
    'image': tf.io.FixedLenFeature([], tf.string),
}

image_feature_description_test = {
    'id': tf.io.FixedLenFeature([], tf.string),
    'image': tf.io.FixedLenFeature([], tf.string),
}

def parse_image_train(proto):
    example = tf.io.parse_single_example(proto, image_feature_description_train)
    image = tf.image.decode_jpeg(example["image"], channels=3)
    label = example["class"]
    return image, label

def parse_image_test(proto):
    example = tf.io.parse_single_example(proto, image_feature_description_test)
    image = tf.image.decode_jpeg(example["image"], channels=3)
    return image, example["id"]

In [None]:
ds_train = get_tfrecord_ds(traindir).map(parse_image_train)
ds_val = get_tfrecord_ds(valdir).map(parse_image_train)
ds_test = get_tfrecord_ds(testdir).map(parse_image_test)

ds_train = ds_train.cache().shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
ds_val = ds_val.batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)

In [None]:
plt.figure(figsize=(10, 10))
for ds in ds_train.take(1):
    for i in range(9):
        plt.subplot(3, 3, i + 1)
        plt.axis("off")
        plt.imshow(ds[0][i])
        plt.title(ds[1][i].numpy())
plt.show()

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.2),
])

plt.figure(figsize=(10, 10))
for ds in ds_train.take(1):
    plt.subplot(3, 3, 1)
    plt.axis("off")
    plt.imshow(ds[0][0])
    for i in range(2, 10):
        plt.subplot(3, 3, i)
        plt.axis("off")
        img = tf.expand_dims(ds[0][0], 0)
        img_aug = data_augmentation(img)
        plt.imshow(img_aug[0])
plt.show()

In [None]:
preprocess_input = tf.keras.applications.xception.preprocess_input
base_model = tf.keras.applications.Xception(
    input_shape=IMAGE_SHAPE,
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False

In [None]:
inputs = tf.keras.Input(shape=IMAGE_SHAPE)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(104)(x)
model = tf.keras.Model(inputs, outputs)

base_learning_rate = 3e-4
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

In [None]:
history = model.fit(
    ds_train, 
    epochs=EPOCHS_INIT, 
    validation_data=ds_val,
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(EPOCHS_INIT)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
base_model.trainable = True
fine_tune_at = -30
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate/10),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

In [None]:
history_fine = model.fit(
    ds_train,
    epochs=EPOCHS_INIT+EPOCHS_FINE,
    initial_epoch=EPOCHS_INIT,
    validation_data=ds_val,
)

In [None]:
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.plot([EPOCHS_INIT-1,EPOCHS_INIT-1], plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.plot([EPOCHS_INIT-1,EPOCHS_INIT-1], plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
model.save("model")

In [None]:
pred = model.predict(ds_test)
pred_label = tf.math.argmax(pred, 1)

In [None]:
ds_test_id = ds_test.map(lambda image, iid: iid).unbatch()
ids = [str(x, "utf-8") for x in ds_test_id.as_numpy_iterator()]

In [None]:
df = pd.read_csv(submission_file)
df["label"] = pred_label
df["id"] = ids
df.to_csv("submission.csv", index=False)