# Data Modelling - Tiger Subspecies Classification
This section covers the responsibilities of the **Data Scientist**:
- Build, train and evaluate 3 CNN models (ResNet50, DenseNet121, MobileNetV3Large)
- Perform transfer learning with pre-trained ImageNet weights
- Tune hyperparameters and observe model performance
- Evaluate using accuracy, mean Average Precision (mAP), and training time

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import shutil

# Define source and destination paths
src_path = '/content/drive/MyDrive/Colab Notebooks/Project AI/tiger_datasets'
dst_path = '/content/tiger_datasets'

# Copy the dataset only if it hasn't already been copied
if not os.path.exists(dst_path):
    print("🚀 Copying dataset from Google Drive to Colab local storage...")
    shutil.copytree(src_path, dst_path)
else:
    print("✅ Dataset already copied to local storage.")

🚀 Copying dataset from Google Drive to Colab local storage...


## Step 1: ⚙️ Install and import all required libraries

In [3]:
!pip install tensorflow matplotlib scikit-learn

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, DenseNet121, MobileNetV3Large
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
import time
from sklearn.metrics import average_precision_score



## Step 2: 🖼️ Data Preprocessing

Create data generators for training, validation, and testing datasets using `ImageDataGenerator`. This resizes images and scales pixel values for improved model performance.

In [4]:
import os
import shutil
import random
from tqdm import tqdm

# Define base directory of dataset and image parameters
base_dir = '/content/tiger_datasets'
image_size = (224, 224)  # Input image size for pretrained models
batch_size = 32          # Number of images per batch during training/testing

## Image Count by Subspecies and Data Split

In [5]:
import os

# Define base directory
base_dir = '/content/tiger_datasets'

# Define splits
splits = ['train', 'val', 'test']

# Get list of subspecies (class names)
subspecies_list = sorted(os.listdir(os.path.join(base_dir, 'train')))

print("\n📊 Image count by subspecies (across train, val, test):\n")

# Loop through each subspecies
for subspecies in subspecies_list:
    total = 0
    print(f"🐅 Subspecies: {subspecies}")
    for split in splits:
        split_path = os.path.join(base_dir, split, subspecies)
        if os.path.exists(split_path):
            count = len([
                f for f in os.listdir(split_path)
                if os.path.isfile(os.path.join(split_path, f))
            ])
        else:
            count = 0
        total += count
        print(f"  - {split.capitalize():<5}: {count}")
    print(f"➡️  Total: {total}\n")


📊 Image count by subspecies (across train, val, test):

🐅 Subspecies: bengal_tiger
  - Train: 416
  - Val  : 87
  - Test : 87
➡️  Total: 590

🐅 Subspecies: indochinese_tiger
  - Train: 434
  - Val  : 92
  - Test : 93
➡️  Total: 619

🐅 Subspecies: malayan_tiger
  - Train: 442
  - Val  : 90
  - Test : 92
➡️  Total: 624

🐅 Subspecies: siberian_tiger
  - Train: 309
  - Val  : 66
  - Test : 67
➡️  Total: 442

🐅 Subspecies: south_china_tiger
  - Train: 318
  - Val  : 68
  - Test : 69
➡️  Total: 455

🐅 Subspecies: sumatran_tiger
  - Train: 557
  - Val  : 119
  - Test : 120
➡️  Total: 796



## Data Generators

In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Set your dataset base directory and constants
base_dir = '/content/tiger_datasets'
image_size = (224, 224)
batch_size = 32

# Define generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    shear_range=0.2,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create the data flows
train_gen = train_datagen.flow_from_directory(
    os.path.join(base_dir, 'train'),
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

val_gen = val_datagen.flow_from_directory(
    os.path.join(base_dir, 'val'),
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

test_gen = test_datagen.flow_from_directory(
    os.path.join(base_dir, 'test'),
    target_size=image_size,
    batch_size=1,
    class_mode='categorical',
    shuffle=False
)

Found 2476 images belonging to 6 classes.
Found 522 images belonging to 6 classes.
Found 528 images belonging to 6 classes.


## Step 3: 🧠 Model Training Function
This function:
- Adds classification layers on top of pre-trained base model
- Trains for 50 epochs
- Tracks training time

In [7]:
from sklearn.metrics import average_precision_score
import numpy as np

def calculate_map(model, data_gen):
    predictions = model.predict(data_gen, verbose=0)
    true_labels = data_gen.classes
    predicted_probs = predictions
    one_hot_labels = np.zeros(predicted_probs.shape)
    one_hot_labels[np.arange(len(true_labels)), true_labels] = 1
    return average_precision_score(one_hot_labels, predicted_probs, average='macro')

In [8]:
def train_and_finetune_model(base_model, model_name, train_gen, val_gen, fine_tune_at=-100):
    from tensorflow.keras.models import Model
    from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
    from sklearn.utils.class_weight import compute_class_weight
    import time

    # Step 1: Build model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(train_gen.num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    # Freeze base model
    base_model.trainable = False

    # Compile model
    model.compile(optimizer=Adam(1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Initial training
    print(f"🔧 Initial training for {model_name}...")
    start_time = time.time()
    history_initial = model.fit(train_gen,
                                epochs=10,
                                validation_data=val_gen,
                                verbose=1)
    init_time = time.time() - start_time

    # Fine-tuning
    print(f"🔁 Fine-tuning {model_name}...")
    for layer in base_model.layers[:fine_tune_at]:
        layer.trainable = False
    for layer in base_model.layers[fine_tune_at:]:
        layer.trainable = True

    model.compile(optimizer=Adam(1e-5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Class weights
    class_weights = compute_class_weight(class_weight='balanced',
                                         classes=np.unique(train_gen.classes),
                                         y=train_gen.classes)
    class_weight_dict = dict(enumerate(class_weights))


    # Fine-tune training
    start_finetune = time.time()
    history_finetune = model.fit(train_gen,
                                 epochs=40,
                                 validation_data=val_gen,
                                 class_weight=class_weight_dict,
                                 verbose=1)
    finetune_time = time.time() - start_finetune
    total_time = init_time + finetune_time

    # Evaluate
    val_loss, val_acc = model.evaluate(val_gen, verbose=0)
    val_map = calculate_map(model, val_gen)

    print(f"\n✅ {model_name} completed.")
    print(f"⏱ Total training time: {total_time/60:.2f} mins")
    print(f"📈 Accuracy: {val_acc:.4f}")
    print(f"📊 mAP: {val_map:.4f}")

    return {
        'model': model,
        'initial_history': history_initial,
        'finetune_history': history_finetune,
        'val_accuracy': val_acc,
        'val_mAP': val_map,
        'total_time': total_time
    }

In [9]:
#resnet50 model train and tune
resnet_result = train_and_finetune_model(ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3)), "ResNet50", train_gen, val_gen)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
🔧 Initial training for ResNet50...


  self._warn_if_super_not_called()


Epoch 1/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 744ms/step - accuracy: 0.1718 - loss: 2.0546 - val_accuracy: 0.2318 - val_loss: 1.7691
Epoch 2/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 544ms/step - accuracy: 0.1790 - loss: 1.8947 - val_accuracy: 0.2433 - val_loss: 1.7817
Epoch 3/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 715ms/step - accuracy: 0.2139 - loss: 1.8428 - val_accuracy: 0.2395 - val_loss: 1.7588
Epoch 4/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 677ms/step - accuracy: 0.2244 - loss: 1.7873 - val_accuracy: 0.2375 - val_loss: 1.7514
Epoch 5/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 512ms/step - accuracy: 0.2353 - loss: 1.7694 - val_accuracy: 0.2356 - val_loss: 1.7492
Epoch 6/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 487ms/step - accuracy: 0.2299 - loss: 1.7811 - val_accuracy: 0.2356 - val_loss: 1.7482
Epoch 7/10
[1m78/78[

In [10]:
#densenet121 model train and tune
densenet_result = train_and_finetune_model(DenseNet121(include_top=False, weights='imagenet', input_shape=(224, 224, 3)), "DenseNet121", train_gen, val_gen)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
🔧 Initial training for DenseNet121...
Epoch 1/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 905ms/step - accuracy: 0.1744 - loss: 2.2015 - val_accuracy: 0.2931 - val_loss: 1.6904
Epoch 2/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 494ms/step - accuracy: 0.2719 - loss: 1.8021 - val_accuracy: 0.3755 - val_loss: 1.5977
Epoch 3/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 483ms/step - accuracy: 0.3155 - loss: 1.6616 - val_accuracy: 0.4119 - val_loss: 1.5433
Epoch 4/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 467ms/step - accuracy: 0.3463 - loss: 1.5983 - val_accuracy: 0.4215 - val_loss: 1.5054
Epoch 5/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 

In [18]:
#mobilenetv3 model train and tune
mobilenet_result = train_and_finetune_model(MobileNetV3Large(include_top=False, weights='imagenet', input_shape=(224, 224, 3)), "MobileNetV3Large", train_gen, val_gen)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_large_224_1.0_float_no_top_v2.h5
[1m12683000/12683000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
🔧 Initial training for MobileNetV3Large...
Epoch 1/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 665ms/step - accuracy: 0.1839 - loss: 1.9187 - val_accuracy: 0.2280 - val_loss: 1.7719
Epoch 2/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 465ms/step - accuracy: 0.2102 - loss: 1.8178 - val_accuracy: 0.2299 - val_loss: 1.7690
Epoch 3/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 469ms/step - accuracy: 0.1968 - loss: 1.8364 - val_accuracy: 0.2337 - val_loss: 1.7699
Epoch 4/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 448ms/step - accuracy: 0.1996 - loss: 1.8260 - val_accuracy: 0.2318 - val_loss: 1.7644
Epoch 5/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37

In [12]:
resnet_result

{'model': <Functional name=functional, built=True>,
 'initial_history': <keras.src.callbacks.history.History at 0x7a4801a232d0>,
 'finetune_history': <keras.src.callbacks.history.History at 0x7a47f83c6810>,
 'val_accuracy': 0.3927203118801117,
 'val_mAP': np.float64(0.43386669149304047),
 'total_time': 2228.184358358383}

In [11]:
densenet_result

{'model': <Functional name=functional_1, built=True>,
 'initial_history': <keras.src.callbacks.history.History at 0x7a4720d55090>,
 'finetune_history': <keras.src.callbacks.history.History at 0x7a46fc786810>,
 'val_accuracy': 0.5766283273696899,
 'val_mAP': np.float64(0.6282657582870591),
 'total_time': 2094.281471967697}

In [19]:
mobilenet_result

{'model': <Functional name=functional_2, built=True>,
 'initial_history': <keras.src.callbacks.history.History at 0x7a4521ae9e90>,
 'finetune_history': <keras.src.callbacks.history.History at 0x7a45219d9a10>,
 'val_accuracy': 0.24904213845729828,
 'val_mAP': np.float64(0.29772638720494365),
 'total_time': 2003.2274901866913}

## Step 4: Save Pkl and .h5 for future visualization

In [13]:
# Unpack from dictionary
resnet_model = resnet_result['model']
resnet_init_hist = resnet_result['initial_history']
resnet_ft_hist = resnet_result['finetune_history']

# Save model
resnet_model.save("resnet50_tiger.h5")

# Save training and fine-tuning histories
import pickle

with open("resnet50_initial_history.pkl", "wb") as f:
    pickle.dump(resnet_init_hist.history, f)

with open("resnet50_finetune_history.pkl", "wb") as f:
    pickle.dump(resnet_ft_hist.history, f)



In [15]:
performance_metrics = {
    'val_accuracy': resnet_result['val_accuracy'],
    'val_mAP': resnet_result['val_mAP'],
    'total_time': resnet_result['total_time']
}

with open("resnet50_performance.pkl", "wb") as f:
    pickle.dump(performance_metrics, f)

In [16]:
# Unpack from dictionary
densenet_model = densenet_result['model']
densenet_init_hist = densenet_result['initial_history']
densenet_ft_hist = densenet_result['finetune_history']

# Save model
densenet_model.save("densenet121_tiger.h5")

# Save training and fine-tuning histories
import pickle

with open("densenet121_initial_history.pkl", "wb") as f:
    pickle.dump(densenet_init_hist.history, f)

with open("densenet121_finetune_history.pkl", "wb") as f:
    pickle.dump(densenet_ft_hist.history, f)



In [17]:
performance_metrics = {
    'val_accuracy': densenet_result['val_accuracy'],
    'val_mAP': densenet_result['val_mAP'],
    'total_time': densenet_result['total_time']
}

with open("densenet121_performance.pkl", "wb") as f:
    pickle.dump(performance_metrics, f)

In [20]:
# Unpack from dictionary
mobilenet_model = mobilenet_result['model']
mobilenet_init_hist = mobilenet_result['initial_history']
mobilenet_ft_hist = mobilenet_result['finetune_history']

# Save model
mobilenet_model.save("mobilenetv3_tiger.h5")

# Save training and fine-tuning histories
import pickle

with open("mobilenetv3_initial_history.pkl", "wb") as f:
    pickle.dump(mobilenet_init_hist.history, f)

with open("mobilenetv3_finetune_history.pkl", "wb") as f:
    pickle.dump(mobilenet_ft_hist.history, f)



In [21]:
performance_metrics = {
    'val_accuracy': mobilenet_result['val_accuracy'],
    'val_mAP': mobilenet_result['val_mAP'],
    'total_time': mobilenet_result['total_time']
}

with open("mobilenetv3_performance.pkl", "wb") as f:
    pickle.dump(performance_metrics, f)

In [22]:
results = {
    "ResNet50": resnet_result,
    "DenseNet121": densenet_result,
    "MobileNetV3Large": mobilenet_result
}

print(f"{'Model':<18} {'Val Accuracy (%)':<18} {'mAP (%)':<12} {'Time (min)':<12}")
print("-" * 60)

for model_name, result in results.items():
    val_acc_percent = result['val_accuracy'] * 100
    val_map_percent = result['val_mAP'] * 100
    time_min = result['total_time'] / 60
    print(f"{model_name:<18} {val_acc_percent:<18.2f} {val_map_percent:<12.2f} {time_min:<12.2f}")

Model              Val Accuracy (%)   mAP (%)      Time (min)  
------------------------------------------------------------
ResNet50           39.27              43.39        37.14       
DenseNet121        57.66              62.83        34.90       
MobileNetV3Large   24.90              29.77        33.39       
