In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras import callbacks
import keras
import keras_tuner
import keras.utils as image
from keras import layers
from keras import ops
from keras import callbacks
from keras import regularizers
from tensorflow.keras.callbacks import LearningRateScheduler
from PIL import Image

## Importing Data from csv files


In [2]:
images = pd.read_csv("/Users/sofie/Desktop/Projects/Classification of Birds/CUB_200_2011/images.txt", sep=r'\s+', names=['image_id', 'image_name'], engine='python')
train_test_split = pd.read_csv("/Users/sofie/Desktop/Projects/Classification of Birds/CUB_200_2011/train_test_split.txt", sep=r'\s+', names=['image_id', 'is_training_image'], engine='python')
classes =pd.read_csv("/Users/sofie/Desktop/Projects/Classification of Birds/CUB_200_2011/classes.txt", sep=r'\s+', names=['class_id', 'class_name'], engine='python')
image_class_labels =pd.read_csv("/Users/sofie/Desktop/Projects/Classification of Birds/CUB_200_2011/image_class_labels.txt", sep=r'\s+', names=['image_id', 'class_id'], engine='python')

## Preprocessing

In [3]:
# Merge dfs based on column names so we have one df with all the necessary info contained per each row
image_data = pd.merge(images,train_test_split, on='image_id')
image_data = pd.merge(image_data,image_class_labels, on='image_id')
image_data = pd.merge(image_data,classes, on='class_id')

In [4]:
# Split training and testing image data
training_image_data = image_data[image_data['is_training_image']==1]
testing_image_data = image_data[image_data['is_training_image']==0]

# Shuffle training data
training_image_data = training_image_data.sample(frac=1)

from PIL import Image

# Check image resolutions before resizing
image_resolutions = []

for i in training_image_data['image_name'].values:
    img = Image.open(f"/Users/sofie/Desktop/Projects/Classification of Birds/CUB_200_2011/images/{i}")
    image_resolutions.append(img.size)  # (width, height)

# Print unique image resolutions
print(set(image_resolutions))  # Show all unique resolutions in dataset


# Initiate empty lists for training and testing images
training_images = []
testing_images = []

# Add training and testing images to corresponding lists
for i in (training_image_data['image_name'].values):
    training_images.append(image.load_img('/Users/sofie/Desktop/Projects/Classification of Birds/CUB_200_2011/images/{}'.format(i), target_size=(299, 299)))

for i in (testing_image_data['image_name'].values):
    testing_images.append(image.load_img('/Users/sofie/Desktop/Projects/Classification of Birds/CUB_200_2011/images/{}'.format(i), target_size=(299, 299)))

# Extract class labels for training and testing images
training_class_label = np.array(training_image_data['class_id'].values)
testing_class_label = np.array(testing_image_data['class_id'].values)

{(500, 435), (360, 284), (500, 444), (298, 309), (500, 453), (500, 462), (312, 500), (500, 471), (457, 500), (446, 500), (200, 179), (500, 300), (500, 309), (400, 231), (500, 318), (500, 327), (300, 400), (500, 336), (208, 137), (419, 500), (350, 239), (225, 217), (261, 310), (359, 315), (300, 238), (200, 300), (400, 325), (456, 387), (500, 421), (265, 289), (476, 500), (487, 500), (500, 430), (500, 439), (360, 288), (500, 448), (500, 457), (361, 500), (423, 344), (425, 283), (500, 286), (500, 295), (500, 304), (485, 441), (250, 120), (311, 288), (500, 313), (418, 500), (230, 300), (500, 322), (360, 400), (323, 500), (361, 383), (230, 327), (380, 253), (400, 320), (500, 407), (402, 500), (391, 500), (500, 416), (380, 500), (400, 329), (400, 338), (328, 462), (500, 425), (500, 434), (230, 183), (330, 270), (480, 465), (500, 443), (459, 500), (495, 355), (500, 272), (364, 500), (353, 500), (500, 281), (498, 500), (342, 500), (500, 290), (334, 276), (255, 221), (500, 299), (140, 145), (50

In [5]:
# Convert list of images to NumPy array
training_images = np.array(training_images)
testing_images = np.array(testing_images)

# Apply preprocessing
preprocessed_training_images = preprocess_input(training_images)
preprocessed_testing_images = preprocess_input(testing_images)

We begin training by keeping the existing layers frozen, allowing only the newly added classification head to learn. This ensures that the output layer is properly trained before fine-tuning the deeper layers. Once the classifier stabilizes, we gradually unfreeze and fine-tune the earlier layers, optimizing them in a controlled manner. This approach prevents instability in feature extraction and allows each layer to adjust effectively, improving overall model performance.

In [6]:
# Load pre-trained Xception model (without top layers)
base_model = keras.applications.xception.Xception(weights="imagenet", include_top=False)

# Add new layers on top
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)
dense = keras.layers.Dense(512, activation="relu", kernel_regularizer=regularizers.l2(l2=1e-4))(avg)  # L2 regularization
output = keras.layers.Dense(201, activation="softmax")(dense)  # 201 classes

# Create model
model = keras.Model(inputs=base_model.input, outputs=output)

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Define optimizer
optimizer = keras.optimizers.Adam()  # Use Adam with a fixed learning rate

# Compile model
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Set early stopping
earlystopping = callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=3, restore_best_weights=True)

# Define validation data split at 30%
split_idx = int(len(preprocessed_training_images) * 0.7)
X_train, X_val = preprocessed_training_images[:split_idx], preprocessed_training_images[split_idx:]
y_train, y_val = training_class_label[:split_idx], training_class_label[split_idx:]

y_train = np.array(y_train).astype(np.int32)
y_val = np.array(y_val).astype(np.int32)

# Train model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), batch_size=32, callbacks=[earlystopping])

Epoch 1/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 1s/step - accuracy: 0.0097 - loss: 5.4586 - val_accuracy: 0.0767 - val_loss: 5.1009 - learning_rate: 1.0000e-04
Epoch 2/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 1s/step - accuracy: 0.0500 - loss: 5.0546 - val_accuracy: 0.1523 - val_loss: 4.6752 - learning_rate: 1.0000e-04
Epoch 3/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 1s/step - accuracy: 0.1038 - loss: 4.6068 - val_accuracy: 0.2046 - val_loss: 4.1398 - learning_rate: 1.0000e-04
Epoch 4/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 1s/step - accuracy: 0.1456 - loss: 4.1412 - val_accuracy: 0.2546 - val_loss: 3.6894 - learning_rate: 1.0000e-04
Epoch 5/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 1s/step - accuracy: 0.1986 - loss: 3.7553 - val_accuracy: 0.2974 - val_loss: 3.3721 - learning_rate: 1.0000e-04
Epoch 6/50
[1m132/132[0m [32m━━━━━━━━━━━━━

KeyboardInterrupt: 

In [7]:
model.evaluate(preprocessed_testing_images, testing_class_label)

[1m 10/182[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:04[0m 723ms/step - accuracy: 0.6645 - loss: 1.4523

KeyboardInterrupt: 