In [1]:
# Import the necessary packages to run the code
# !pip install keras_tuner
# import keras_tuner
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tempfile
import keras
from keras.utils import to_categorical
import keras.utils as image
from keras import layers
from keras import ops
from keras import callbacks
import tensorflow as tf
from tensorflow.keras import metrics



In [2]:
os.listdir('CUB_200_2011')

['classes.txt',
 '.DS_Store',
 'images',
 'train_test_split.txt',
 'README',
 'attributes',
 'bounding_boxes.txt',
 'parts',
 'images.txt',
 'image_class_labels.txt']

##**README for dataset description**

In [3]:
with open('CUB_200_2011/README', 'r') as file:
  print(file.read())

The Caltech-UCSD Birds-200-2011 Dataset

For more information about the dataset, visit the project website:

  http://www.vision.caltech.edu/visipedia

If you use the dataset in a publication, please cite the dataset in
the style described on the dataset website (see url above).

Directory Information
---------------------

- images/
    The images organized in subdirectories based on species. See 
    IMAGES AND CLASS LABELS section below for more info.
- parts/
    15 part locations per image. See PART LOCATIONS section below 
    for more info.
- attributes/
    322 binary attribute labels from MTurk workers. See ATTRIBUTE LABELS 
    section below for more info.



IMAGES AND CLASS LABELS:
Images are contained in the directory images/, with 200 subdirectories (one for each bird species)

------- List of image files (images.txt) ------
The list of image file names is contained in the file images.txt, with each line corresponding to one image:

<image_id> <image_name>
---------------

##**Importing data and cleaning**


In [4]:
#Images and class labels

images = pd.read_csv("CUB_200_2011/images.txt",sep = None,names=['image_id' ,'image_name'],engine='python')
print(images)

train_test_split =pd.read_csv("CUB_200_2011/train_test_split.txt",sep = None,names=['image_id' ,'is_training_image'],engine='python')
print(train_test_split)

classes =pd.read_csv("CUB_200_2011/classes.txt",sep = None,names=['class_id', 'class_name'],engine='python')
print(classes)

image_class_labels =pd.read_csv("CUB_200_2011/image_class_labels.txt",sep = None,names=['image_id', 'class_id'],engine='python')
print(image_class_labels)


       image_id                                         image_name
0             1  001.Black_footed_Albatross/Black_Footed_Albatr...
1             2  001.Black_footed_Albatross/Black_Footed_Albatr...
2             3  001.Black_footed_Albatross/Black_Footed_Albatr...
3             4  001.Black_footed_Albatross/Black_Footed_Albatr...
4             5  001.Black_footed_Albatross/Black_Footed_Albatr...
...         ...                                                ...
11783     11784  200.Common_Yellowthroat/Common_Yellowthroat_00...
11784     11785  200.Common_Yellowthroat/Common_Yellowthroat_00...
11785     11786  200.Common_Yellowthroat/Common_Yellowthroat_00...
11786     11787  200.Common_Yellowthroat/Common_Yellowthroat_00...
11787     11788  200.Common_Yellowthroat/Common_Yellowthroat_00...

[11788 rows x 2 columns]
       image_id  is_training_image
0             1                  0
1             2                  1
2             3                  0
3             4               

In [5]:
#bounding boxes

bounding_boxes = pd.read_csv("CUB_200_2011/bounding_boxes.txt",sep = None,names=['image_id','x','y','width','height'],engine='python')
print(bounding_boxes)


       image_id      x      y  width  height
0             1   60.0   27.0  325.0   304.0
1             2  139.0   30.0  153.0   264.0
2             3   14.0  112.0  388.0   186.0
3             4  112.0   90.0  255.0   242.0
4             5   70.0   50.0  134.0   303.0
...         ...    ...    ...    ...     ...
11783     11784   89.0   95.0  354.0   250.0
11784     11785  157.0   62.0  184.0   219.0
11785     11786  190.0  102.0  198.0   202.0
11786     11787    3.0   20.0  408.0   307.0
11787     11788   20.0  113.0  177.0   263.0

[11788 rows x 5 columns]


In [6]:
#part locations

parts = pd.read_fwf("CUB_200_2011/parts/parts.txt",widths = [2,11],names=['part_id','part_name'],engine='python')
print(parts)

part_locs = pd.read_csv("CUB_200_2011/parts/part_locs.txt",sep = None,names=['image_id','part_id','x','y','visible'],engine='python')
print(part_locs)

part_click_locs = pd.read_csv("CUB_200_2011/parts/part_click_locs.txt",sep = None,names=['image_id','part_id','x','y','visible','time'],engine='python')
print(part_click_locs)

    part_id   part_name
0         1        back
1         2        beak
2         3       belly
3         4      breast
4         5       crown
5         6    forehead
6         7    left eye
7         8    left leg
8         9   left wing
9        10        nape
10       11   right eye
11       12   right leg
12       13  right wing
13       14        tail
14       15      throat
        image_id  part_id      x      y  visible
0              1        1    0.0    0.0        0
1              1        2  312.0  182.0        1
2              1        3    0.0    0.0        0
3              1        4    0.0    0.0        0
4              1        5  186.0   45.0        1
...          ...      ...    ...    ...      ...
176815     11788       11  136.0  133.0        1
176816     11788       12  102.0  316.0        1
176817     11788       13   94.0  224.0        1
176818     11788       14   36.0  329.0        1
176819     11788       15  166.0  161.0        1

[176820 rows x 5 columns]
 

In [7]:
#Attribute labels

attributes = pd.read_csv("attributes.txt",sep = None,names=['attribute_id','attribute_name'],engine='python')
print(attributes)

certainties = pd.read_fwf("CUB_200_2011/attributes/certainties.txt",widths = [2,15],names=['certainty_id','certainty_name'],engine='python')
print(certainties)

image_attribute_labels = pd.read_csv("CUB_200_2011/attributes/image_attribute_labels.txt",sep = '\s+', names=['image_id','attribute_id','is_present','certainty_id','time','temp'],engine='python')
print(image_attribute_labels)

class_attribute_labels_continuous = pd.read_csv("CUB_200_2011/attributes/class_attribute_labels_continuous.txt", sep = None, names =range(1,313), engine='python')
print(class_attribute_labels_continuous)


     attribute_id                       attribute_name
0               1  has_bill_shape::curved_(up_or_down)
1               2               has_bill_shape::dagger
2               3               has_bill_shape::hooked
3               4               has_bill_shape::needle
4               5       has_bill_shape::hooked_seabird
..            ...                                  ...
307           308                has_crown_color::buff
308           309              has_wing_pattern::solid
309           310            has_wing_pattern::spotted
310           311            has_wing_pattern::striped
311           312      has_wing_pattern::multi-colored

[312 rows x 2 columns]
   certainty_id certainty_name
0             1    not visible
1             2       guessing
2             3       probably
3             4     definitely
         image_id  attribute_id  is_present  certainty_id    time  temp
0               1             1           0             3  27.708   NaN
1               1

In [8]:
#CLEANING IMAGE ATTRIBUTE LABELS - has extra column for 606 rows
to_fix = image_attribute_labels.loc[image_attribute_labels['temp'].notnull()]
print(to_fix)
for row in to_fix.index:
  image_attribute_labels.iat[row,to_fix.columns.get_loc('time')]= image_attribute_labels['temp'][row]
to_fix = image_attribute_labels.loc[image_attribute_labels['temp'].notnull()]
print(to_fix)
image_attribute_labels.drop('temp',axis=1,inplace = True)
print(image_attribute_labels)

         image_id  attribute_id  is_present  certainty_id  time    temp
709497       2275            10           0             1   0.0   1.509
709498       2275            11           0             1   0.0   1.509
709499       2275            12           0             1   0.0   1.509
709500       2275            13           0             1   0.0   1.509
709501       2275            14           0             1   0.0   1.509
...           ...           ...         ...           ...   ...     ...
2921563      9364           308           0             3   0.0  13.288
2921564      9364           309           0             3   0.0   6.240
2921565      9364           310           0             3   0.0   6.240
2921566      9364           311           0             3   0.0   6.240
2921567      9364           312           1             3   0.0   6.240

[606 rows x 6 columns]
         image_id  attribute_id  is_present  certainty_id    time    temp
709497       2275            10       

##**Preprocessing**

In [9]:
print(len(images))
print(len(train_test_split))
print(len(image_class_labels))
print(len(classes))
print(len(bounding_boxes))

11788
11788
11788
200
11788


In [10]:
image_data = pd.merge(images,train_test_split, on='image_id')
image_data = pd.merge(image_data,image_class_labels, on='image_id')
image_data = pd.merge(image_data,classes, on='class_id')
image_data = pd.merge(image_data,bounding_boxes, on='image_id')
print(image_data.to_string())
print(len(image_data))

       image_id                                                                         image_name  is_training_image  class_id                          class_name      x      y  width  height
0             1                      001.Black_footed_Albatross/Black_Footed_Albatross_0046_18.jpg                  0         1          001.Black_footed_Albatross   60.0   27.0  325.0   304.0
1             2                      001.Black_footed_Albatross/Black_Footed_Albatross_0009_34.jpg                  1         1          001.Black_footed_Albatross  139.0   30.0  153.0   264.0
2             3                      001.Black_footed_Albatross/Black_Footed_Albatross_0002_55.jpg                  0         1          001.Black_footed_Albatross   14.0  112.0  388.0   186.0
3             4                      001.Black_footed_Albatross/Black_Footed_Albatross_0074_59.jpg                  1         1          001.Black_footed_Albatross  112.0   90.0  255.0   242.0
4             5                    

In [11]:
print(len(parts))
print(len(part_locs))
print(len(part_click_locs))

15
176820
794258


In [12]:
part_locs = pd.merge(part_locs,parts, on='part_id')
print(part_locs)
part_click_locs = pd.merge(part_click_locs,parts, on='part_id')
print(part_click_locs)

        image_id  part_id      x      y  visible   part_name
0              1        1    0.0    0.0        0        back
1              1        2  312.0  182.0        1        beak
2              1        3    0.0    0.0        0       belly
3              1        4    0.0    0.0        0      breast
4              1        5  186.0   45.0        1       crown
...          ...      ...    ...    ...      ...         ...
176815     11788       11  136.0  133.0        1   right eye
176816     11788       12  102.0  316.0        1   right leg
176817     11788       13   94.0  224.0        1  right wing
176818     11788       14   36.0  329.0        1        tail
176819     11788       15  166.0  161.0        1      throat

[176820 rows x 6 columns]
        image_id  part_id      x      y  visible   time part_name
0              1        1    0.0    0.0        0  2.329      back
1              1        1    0.0    0.0        0  2.294      back
2              1        1    0.0    0.0    

In [13]:
print(len(attributes))
print(len(image_attribute_labels))
print(len(certainties))

312
3677856
4


In [14]:
image_attribute_labels = pd.merge(image_attribute_labels,attributes, on='attribute_id')
image_attribute_labels = pd.merge(image_attribute_labels,certainties, on='certainty_id')
print(image_attribute_labels)

         image_id  attribute_id  is_present  certainty_id    time  \
0               1             1           0             3  27.708   
1               1             2           0             3  27.708   
2               1             3           0             3  27.708   
3               1             4           0             3  27.708   
4               1             5           1             3  27.708   
...           ...           ...         ...           ...     ...   
3677851     11788           308           1             4   4.989   
3677852     11788           309           0             4   8.309   
3677853     11788           310           0             4   8.309   
3677854     11788           311           0             4   8.309   
3677855     11788           312           1             4   8.309   

                              attribute_name certainty_name  
0        has_bill_shape::curved_(up_or_down)       probably  
1                     has_bill_shape::dagger   

##**Description of cleaned dataframes:**

1.  image_data :
*   contains data from images, train_test_split, classes, image_class_labels, bounding_boxes.
*   columns: 'image_id', 'image_name', 'is_training_image', 'class_id', 'class_name', 'x', 'y', 'width', 'height'
*   dimensions: 11788 rows x 9 columns

2.  part_locs :
*   contains data from part_locs, parts.
*   columns: 'image_id', 'part_id', 'x', 'y', 'visible', 'part_name'
*   dimensions: 176820 rows x 6 columns

3.  part_click_locs :
*   contains data from part_click_locs, parts.
*   columns: image_id', 'part_id', 'x', 'y', 'visible', 'time', 'part_name'
*   dimensions: 794258 rows x 7 columns

4.  image_attribute_labels :
*   contains data from image_attribute_labels, attributes, certainties.
*   columns: 'image_id', 'attribute_id', 'is_present', 'certainty_id', 'time',
      'attribute_name', 'certainty_name'
*   dimensions: 3677856 rows x 7 columns

5.  class_attribute_labels_continuous :
*   rows: each row represents a bird class (same order as classes.txt)
*   columns: each column represents an attribute (same order as attributes.txt)
*   dimensions: 200 rows x 312 columns

In [15]:
##split training and testing image data
training_image_data = image_data[image_data['is_training_image']==1]
testing_image_data = image_data[image_data['is_training_image']==0]

##shuffle training data
training_image_data = training_image_data.sample(frac=1)

##empty lists for training and testing images
training_images = []
testing_images = []

##filling in lists for testing and training images
for i in (training_image_data['image_name'].values):
    training_images.append(image.load_img('CUB_200_2011/images/{}'.format(i), target_size=(224, 224)))

for i in (testing_image_data['image_name'].values):
    testing_images.append(image.load_img('CUB_200_2011/images/{}'.format(i), target_size=(224, 224)))

##class labels - encoded
training_class_label = np.array(training_image_data['class_id'].values)
testing_class_label = np.array(testing_image_data['class_id'].values)

In [16]:
# Data preprocessing function for xception
def preprocess(image, label):
  final_image = keras.applications.xception.preprocess_input(image)
  return final_image, label

##preprocessing testing and training data using function
preprocessed_training_images = preprocess(np.array(training_images), training_class_label)
preprocessed_testing_images = preprocess(np.array(testing_images), testing_class_label)

In [17]:
# Split the training data into training and validation sets, the validation set is 30% of the training data
x_train, x_val, y_train, y_val = train_test_split(preprocessed_training_images[0], 
                                                  preprocessed_training_images[1], 
                                                  test_size=0.3, 
                                                  random_state=42)


In [None]:
# One hot encode the data so the outputs are discrete and they work with the keras metrics function
y_train_encoded = to_categorical(y_train, num_classes=201)
y_val_encoded = to_categorical(y_val, num_classes=201)
y_test_encoded = to_categorical(preprocessed_testing_images[1], num_classes=201)

In [None]:
# The code for data augmentation if implementation is desired. Our best performing model (this one) does not
# augment the data
from tensorflow.keras.preprocessing.image import ImageDataGenerator

gen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.2,
    zoom_range=0.3,
    fill_mode='nearest',
    horizontal_flip=True,
    vertical_flip=False
)

# # Generate augmented data
# augmented_train_generator = gen.flow(x_train, y_train_encoded, batch_size=32)

In [18]:
# Fine-tuning xception model (globalaveragepooling turns it in to a vector)
base_model = keras.applications.xception.Xception(weights="imagenet", include_top=False)
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)

# Add output layer to model
output = keras.layers.Dense(201, activation="softmax")(avg)
model = keras.Model(inputs=base_model.input, outputs=output)

In [None]:
# Code for the model built using Convolutional layers if testing with various non-pretrained models is desired
# Uncomment the below and comment the above cell if wanting to change model bases.

# # Load pre-trained Xception model without the top layers
# base_model = keras.applications.Xception(weights="imagenet", include_top=False)

# # Freeze the base model layers
# base_model.trainable = False

# # Add your custom layers on top of the base model
# model = keras.Sequential()
# model.add(base_model)
# model.add(layers.Conv2D(512, 3, activation="relu", padding="same"))  # Add padding to preserve spatial dimensions
# model.add(layers.Conv2D(512, 3, activation="relu", padding="same"))
# model.add(layers.MaxPooling2D(3))
# model.add(layers.GlobalAveragePooling2D())
# model.add(layers.Dense(201, activation="softmax"))

In [20]:
# Code to add regularization to the model if desired.
'''
  silva2019kerasregularization,
  title={How to Add Regularization to Keras Pre-trained Models the Right Way},
  author={Silva, Thalles Santos},
  journal={https://sthalles.github.io},
  year={2019}
  url={https://sthalles.github.io/keras-regularizer/}
'''
def add_regularization(model, regularizer=tf.keras.regularizers.l2(0.00001)):

    if not isinstance(regularizer, tf.keras.regularizers.Regularizer):
      print("Regularizer must be a subclass of tf.keras.regularizers.Regularizer")
      return model

    for layer in model.layers:
        for attr in ['kernel_regularizer']:
            if hasattr(layer, attr):
              setattr(layer, attr, regularizer)

    # When we change the layers attributes, the change only happens in the model config file
    model_json = model.to_json()

    # Save the weights before reloading the model.
    tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp.weights.h5')
    model.save_weights(tmp_weights_path)

    # load the model from the config
    model = tf.keras.models.model_from_json(model_json)
    
    # Reload the model weights
    model.load_weights(tmp_weights_path)#, by_name=True)
    return model

In [21]:
# Uncomment and adjust the regularization parameter as needed if desired.
# model = add_regularization(model, regularizer=tf.keras.regularizers.l2(0.00001))

In [23]:
dataset_size = len(training_images)

# Define a learning rate scheduler 
# Tune the hyperparameters to document how they affect model performance
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.02,
    decay_steps=10000,
    decay_rate=0.9)

# Early stopping to prevent overfitting
earlystopping = callbacks.EarlyStopping(monitor="val_loss",
                                        mode="min",
                                        patience=3,
                                        restore_best_weights=True)


# Training with existing layers fixed to make sure the added ouput layer works
for layer in base_model.layers:
    layer.trainable = False

# set up the optimizer and compile the model using categorical crossentropy loss
optimizer = keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
model.compile(loss="categorical_crossentropy", 
              optimizer=optimizer, metrics=['accuracy', 
              keras.metrics.Precision(), 
              keras.metrics.Recall(), 
              keras.metrics.F1Score()])

#fit the model to the training and validation data to begin network training
history = model.fit(x_train, y_train_encoded, 
                    epochs=5, 
                    batch_size=32, 
                    validation_data=(x_val, y_val_encoded), callbacks=[earlystopping])

print()

Epoch 1/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m286s[0m 2s/step - accuracy: 0.0861 - f1_score: 0.0712 - loss: 4.7269 - precision: 0.4494 - recall: 0.0041 - val_accuracy: 0.2668 - val_f1_score: 0.2338 - val_loss: 3.0994 - val_precision: 0.8354 - val_recall: 0.0367
Epoch 2/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 2s/step - accuracy: 0.4141 - f1_score: 0.3743 - loss: 2.5504 - precision: 0.8014 - recall: 0.0940 - val_accuracy: 0.4069 - val_f1_score: 0.3740 - val_loss: 2.4509 - val_precision: 0.8398 - val_recall: 0.1195
Epoch 3/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 2s/step - accuracy: 0.5773 - f1_score: 0.5260 - loss: 1.8093 - precision: 0.9144 - recall: 0.2230 - val_accuracy: 0.4419 - val_f1_score: 0.4170 - val_loss: 2.2307 - val_precision: 0.8320 - val_recall: 0.1707
Epoch 4/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 2s/step - accuracy: 0.6691 - f1_score: 0.6226 - loss: 1.4419 - p

In [24]:
# Train the model again with the base layers unfrozen
for layer in base_model.layers:
  layer.trainable = True

optimizer = keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
model.compile(loss="categorical_crossentropy", 
              optimizer=optimizer,metrics=['accuracy', 
              keras.metrics.Precision(), 
              keras.metrics.Recall(), 
              keras.metrics.F1Score()])
history = model.fit(x_train, y_train_encoded, 
                    epochs=5, 
                    batch_size=32, 
                    validation_data=(x_val, y_val_encoded), callbacks=[earlystopping])

Epoch 1/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m757s[0m 6s/step - accuracy: 0.5980 - f1_score: 0.5501 - loss: 1.7261 - precision_1: 0.9112 - recall_1: 0.2917 - val_accuracy: 0.4091 - val_f1_score: 0.4008 - val_loss: 2.4883 - val_precision_1: 0.5399 - val_recall_1: 0.3085
Epoch 2/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1112s[0m 8s/step - accuracy: 0.8887 - f1_score: 0.8401 - loss: 0.4879 - precision_1: 0.9541 - recall_1: 0.7752 - val_accuracy: 0.5970 - val_f1_score: 0.5891 - val_loss: 1.5058 - val_precision_1: 0.7785 - val_recall_1: 0.4747
Epoch 3/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1059s[0m 8s/step - accuracy: 0.9722 - f1_score: 0.9245 - loss: 0.1641 - precision_1: 0.9814 - recall_1: 0.9472 - val_accuracy: 0.6681 - val_f1_score: 0.6610 - val_loss: 1.2511 - val_precision_1: 0.8016 - val_recall_1: 0.5570
Epoch 4/5
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1059s[0m 8s/step - accuracy: 0.9904 - f1_score

In [25]:
# Evaluate the model on the testing data to obtain the model performance metrics
results = model.evaluate(preprocessed_testing_images[0], y_test_encoded)

[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 2s/step - accuracy: 0.6948 - f1_score: 0.3619 - loss: 1.1103 - precision_1: 0.7977 - recall_1: 0.6185
