In [None]:
import os
import glob
print(os.listdir("../input/tpu-getting-started/tfrecords-jpeg-192x192/train"))

In [None]:
!pip install pdpipe
!pip install kaggledatasets
!pip install tensorflow_datasets

import seaborn as sns
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import pdpipe as pdp
import numpy as np
from tensorflow.keras.layers.experimental import preprocessing
from kaggle_datasets import KaggleDatasets

In [None]:
# Turn on tpu
# Detect TPU, return appropriate distribution strategy
strategy = tf.distribute.get_strategy() 

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() 

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path()
GCS_DS_PATH

GCS_PATH_SELECT = { # available image sizes
    192: GCS_DS_PATH + '/tfrecords-jpeg-192x192',
    224: GCS_DS_PATH + '/tfrecords-jpeg-224x224',
    331: GCS_DS_PATH + '/tfrecords-jpeg-331x331',
    512: GCS_DS_PATH + '/tfrecords-jpeg-512x512'
}
GCS_PATH = GCS_PATH_SELECT[192]

TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/train/*.tfrec')
VALIDATION_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/val/*.tfrec')
TEST_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/test/*.tfrec') # predictions on this dataset should be submitted for the competition

In [None]:
!gsutil ls $GCS_PATH

In [None]:
# Read train data
train_filenames = TRAINING_FILENAMES
train_dataset = tf.data.TFRecordDataset(train_filenames)

# Read val data
val_filenames = VALIDATION_FILENAMES
val_dataset = tf.data.TFRecordDataset(val_filenames)

# Read test dataset
test_filenames = TEST_FILENAMES
test_dataset = tf.data.TFRecordDataset(test_filenames)

In [None]:
for raw_record in train_dataset.take(1):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

In [None]:
# Create a description of the features.
feature_description = {
    "class": tf.io.FixedLenFeature([], tf.int64, default_value=0),
    "id": tf.io.FixedLenFeature([], tf.string, default_value=""),
    "image": tf.io.FixedLenFeature([], tf.string, default_value=""),
}

def parse_labeled_data(example_proto):
    # Parse the input `tf.train.Example` proto using the dictionary above.
    parsed = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.image.decode_jpeg(parsed["image"], channels=3)  # image format uint8 [0,255]
    image = tf.reshape(image, [192, 192, 3]) # explicit size needed for TPU
    return image, parsed["class" ]

def parse_unlabeled_data(example_proto):
    # Parse the input `tf.train.Example` proto using the dictionary above.
    parsed = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.image.decode_jpeg(parsed["image"], channels=3)  # image format uint8 [0,255]
    image = tf.reshape(image, [192, 192, 3]) # explicit size needed for TPU
    return image, parsed["id"]

train_dataset_2 = train_dataset.map(parse_labeled_data).batch(128).shuffle(128*100)
val_dataset_2 = val_dataset.map(parse_labeled_data).batch(128).shuffle(128*100)
test_dataset_2 = test_dataset.map(parse_unlabeled_data).batch(128)

In [None]:
with strategy.scope():
    # pretrained model
    
    base_model = tf.keras.applications.MobileNetV2(include_top=False, weights='imagenet')
    
    base_model.trainable = False

    model = tf.keras.Sequential([
        tf.keras.layers.experimental.preprocessing.Rescaling(1./255, input_shape=(192, 192, 3)),
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(104, activation="softmax")
    ])
    model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_dataset_2, epochs=10, validation_data=val_dataset_2)

In [None]:
test_images_ds = test_dataset_2.map(lambda image, idnum: image)
predictions = np.argmax(model.predict(test_images_ds), axis=-1)
print(predictions)

print('Generating submission.csv file...')
test_ids_ds = test_dataset_2.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(test_ids_ds.batch(np.size(predictions)))).numpy().astype('U') # all in one batch
data = {
    "id": test_ids,
    "label": predictions
}
pd.DataFrame(data).to_csv("submission.csv", index=False)