In [None]:
!pip install --upgrade tensorflow openimages gdown

In [None]:
import tensorflow as tf
print(tf.__version__)

2.11.0


In [None]:
import gdown
import tarfile
import os
ds_id = "1DqQkuNgWj7K8VnwFEuxs-_sO_LLC5ezp"
ds_fname = "gic_dataset.tar.xz"
gdown.download(id=ds_id, output=ds_fname, quiet=True)
with tarfile.open(ds_fname) as f:
        f.extractall()
os.remove(ds_fname)

In [None]:
!ls gic_dataset/train/ | wc -l
!du -sh gic_dataset

50
3.6G	gic_dataset


# Load the dataset

In [None]:
IMG_SIZE = (224, 224)

In [None]:
ds_path = "gic_dataset"
train_dir = os.path.join(ds_path, "train")
batch_size = 32
with open("gic_labels.txt") as f:
        gic_labels = f.readlines()
gic_labels = list(map(lambda x: x.strip(), gic_labels))
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
        directory=train_dir,
        class_names=gic_labels,
        batch_size=batch_size,
        image_size=IMG_SIZE,
        shuffle=True,
        seed=1,
        validation_split=0.2,
        subset="both")
train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=tf.data.AUTOTUNE)

Found 72733 files belonging to 50 classes.
Using 58187 files for training.
Using 14546 files for validation.


In [None]:
test_dir = os.path.join(ds_path, "test")
for cat in gic_labels:
  clp=os.path.join(test_dir,cat)
  if not os.path.exists(clp):
    os.mkdir(clp)

In [None]:
test_ds = tf.keras.utils.image_dataset_from_directory(
        directory=test_dir,
        class_names=gic_labels,
        batch_size=batch_size,
        image_size=IMG_SIZE,
        shuffle=True,
        seed=1)

Found 1486 files belonging to 50 classes.


# Define the Model 

In [None]:
data_augmentation = tf.keras.Sequential(layers=[
        tf.keras.layers.RandomFlip(),
        tf.keras.layers.RandomRotation(factor=0.2),
        tf.keras.layers.RandomWidth(factor=0.2),
        tf.keras.layers.RandomHeight(factor=0.2),
        tf.keras.layers.RandomZoom(height_factor=0.2, width_factor=0.2)
    ])
input_shape = IMG_SIZE + (3, )
base_model = tf.keras.applications.EfficientNetV2B0(
        include_top=False, input_shape=input_shape, pooling="avg")
base_model.trainable = False
prediction_layer = tf.keras.layers.Dense(
        units=50, activation=tf.keras.activations.softmax)
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.Dropout(rate=0.2)(x)
ouputs = prediction_layer(x)
model = tf.keras.Model(inputs, ouputs)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-b0_notop.h5




# Define the callbacks

In [None]:
class OverFitMonCB(tf.keras.callbacks.Callback):
    """Monitor Overfitting."""

    def on_epoch_end(self, epoch, logs):
        """Print the loss ratio."""
        print(f"\tval_loss/loss: {logs['val_loss']/logs['loss']}")


TBOARD_ROOT_LOGDIR = "artifacts/tboard/"

import time

def get_tboard_logdir():
    """Get unique logdir name for each run."""
    run_id = time.strftime("run_%Y_%m_%d_%H_%M_%S")

    return os.path.join(TBOARD_ROOT_LOGDIR, run_id)

In [None]:
cb_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath="artifacts/models/", save_best_only=True)
cb_earlystop = tf.keras.callbacks.EarlyStopping(patience=5,
                                                    restore_best_weights=True)
cb_tboard = tf.keras.callbacks.TensorBoard(log_dir=get_tboard_logdir())
callbacks = (cb_checkpoint, cb_earlystop, cb_tboard, OverFitMonCB())

# Feature extraction

In [None]:
model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.2),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=("acc", ))

In [None]:
initial_epochs = 10
history = model.fit(train_ds,
                        epochs=initial_epochs,
                        callbacks=callbacks,
                        validation_data=val_ds)

Epoch 1/10








val_loss/loss: 0.8527725686851783
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


In [None]:
model.evaluate(test_ds)



[13.961403846740723, 0.8681023120880127]

# Fine tune the model

In [None]:
base_model.trainable = True
fine_tune_from = 252

for layer in base_model.layers[:fine_tune_from]:
    layer.trainable = False

for layer in base_model.layers[fine_tune_from:]:
    if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False
            
model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.01),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=("acc", ))

In [None]:
total_epochs = initial_epochs + 30
history_ft = model.fit(train_ds,
                           epochs=total_epochs,
                           callbacks=callbacks,
                           validation_data=val_ds,
                           initial_epoch=history.epoch[-1])

Epoch 6/40








val_loss/loss: 0.3614343983583407
Epoch 7/40



val_loss/loss: 0.604620724214018
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40


# Convert model to quantized tflite model with optimizations

In [None]:
def gen_representative_data():
        for item, _ in train_ds.take(100):
            yield [item]

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = gen_representative_data
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_model = converter.convert()
import pathlib
pathlib.Path("artifacts/models/gic_uint8_v1.tflite").write_bytes(tflite_model)

In [None]:
[x[1] for x in train_ds.take(1)]

[<tf.Tensor: shape=(32,), dtype=int32, numpy=
 array([33, 19, 46, 33, 46, 46, 13, 33, 46, 46,  9, 46, 46, 46,  4, 46, 10,
        46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 33, 10, 33, 46],
       dtype=int32)>]

In [None]:
!XZ_DEFAULTS="-T 6" tar -caf gic_artifacts.tar.xz artifacts/