In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import time

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import layers, models, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import AdamW

In [None]:
# Check GPU Availability
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    print("Running on TPU")
except Exception as e:
    strategy = tf.distribute.get_strategy()  # default strategy for CPU and single GPU
    print("Running on CPU or single GPU")

In [None]:
# Dimension Selection
dimension_selection = "224x224"

# Load train & val TFRecords
train_files = tf.io.gfile.glob(f"/kaggle/input/flower-classification-with-tpus/tfrecords-jpeg-{dimension_selection}/train/*.tfrec")
val_files = tf.io.gfile.glob(f"/kaggle/input/flower-classification-with-tpus/tfrecords-jpeg-{dimension_selection}/val/*.tfrec")

In [None]:
# Feature schema for parsing
feature_description = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'class': tf.io.FixedLenFeature([], tf.int64),
}

# Parsing function
def parse_example(example_proto):
    example = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.io.decode_jpeg(example['image'], channels=3)
    image = tf.image.resize(image, [224, 224]) / 255.0  # Normalize
    label = example['class']
    return image, label


In [None]:
# Grab Data
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 10000  # fixed shuffle buffer size for train

# Prepare full train dataset pipeline (no filtering)
train_dataset = tf.data.TFRecordDataset(train_files)
train_dataset = train_dataset.map(parse_example, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
train_dataset = train_dataset.repeat()  # Repeat for multiple epochs
train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Prepare validation dataset pipeline
validation_dataset = tf.data.TFRecordDataset(val_files)
validation_dataset = validation_dataset.map(parse_example, num_parallel_calls=tf.data.AUTOTUNE)
# No shuffle or repeat for validation
validation_dataset = validation_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
# Baseline CNN Model

def baseline_model(num_classes):
    data_augmentation = tf.keras.Sequential([
        layers.RandomFlip("horizontal_and_vertical"),
        layers.RandomRotation(0.2),        # slight rotation
        layers.RandomZoom(height_factor=(-0.1, 0.1), width_factor=(-0.1, 0.1)),
        layers.RandomContrast(0.2),
    ], name="data_augmentation") # Data Augmentation

    # CNN model architecture using Sequential API
    model = models.Sequential([
        layers.Input(shape=(224, 224, 3)), # Input layer for 224x224 RGB images
        data_augmentation, # Apply Data Augmentation

        # First convolutional block
        layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        layers.BatchNormalization(), # Normalize activations
        layers.MaxPooling2D(), # Downsample feature maps by taking max value in 2x2 windows

        # Second convolutional block
        layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),

        # Third convolutional block
        layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),

        layers.Flatten(), # Flatten 3D feature maps to 1D vector for dense layers

        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.4), # Dropout with 40% rate to reduce overfitting

        layers.Dense(num_classes, activation='softmax') # Output layer with softmax activation for classification

    ])

    return model

In [None]:
num_classes = 104 # Number of target classes

with strategy.scope():
    model = baseline_model(num_classes)  # Instantiate the baseline CNN model with specified number of classes

    model.compile(
    optimizer=AdamW(learning_rate=1e-3, weight_decay=1e-5), # Use AdamW optimizer with learning rate 0.001 and weight decay for regularization
    loss='sparse_categorical_crossentropy', # Use sparse categorical cross-entropy loss
    metrics=['accuracy']
    )

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
import os
import time

# Define checkpoint path
checkpoint_dir = "/kaggle/working/checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, "epoch_{epoch:02d}_valacc_{val_accuracy:.2f}.h5"),
    monitor='val_accuracy',          # monitor 'val_loss'
    save_best_only=True,             # Only saves best model by val_accuracy
    save_weights_only=False,         # Saves the full model, not just weights
    verbose=1
)

In [None]:
# Early stopping: stop training if val_loss doesn't improve for 7 epochs
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=7,
    restore_best_weights=True,
    verbose=1
)

# Reduce learning rate when val_loss plateaus
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,        # reduce LR by half
    patience=5,
    min_lr=1e-6,
    verbose=1
)

BATCH_SIZE = 32
steps_per_epoch = 12000 // BATCH_SIZE  # = 375

history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=100,
    steps_per_epoch=steps_per_epoch,
    callbacks=[checkpoint_callback, reduce_lr, early_stopping]
)
end_train = time.time()

# print(f"Training time: {(end_train - start_train)/60:.2f} minutes")

# Evaluate on validation set (has labels)
start_eval = time.time()
val_loss, val_acc = model.evaluate(validation_dataset)
end_eval = time.time()

print(f"Validation evaluation time: {(end_eval - start_eval):.2f} seconds")
print(f"Validation accuracy: {val_acc:.3f}")


In [None]:
import matplotlib.pyplot as plt
# Plot training & validation accuracy values
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.show()

In [None]:
model.save('/kaggle/working/my_final_model.h5')