# **Task:Bird classification with CNN**


Data set of 525 bird species. 84635 training images, 2625 test images(5 images per species) and 2625 validation images(5 images per species)

*Accuracy of a random classifier: 0.4%

**1. CNN without data augmentation**

1 Download the data

In [1]:
! pip install -q kaggle

In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"vigilantius","key":"32eb8a75f3acb58b3446037836717e7e"}'}

In [3]:
!mkdir ~/.kaggle/
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle datasets download -d gpiosenka/100-bird-species

Dataset URL: https://www.kaggle.com/datasets/gpiosenka/100-bird-species
License(s): CC0-1.0
Downloading 100-bird-species.zip to /content
100% 1.96G/1.96G [01:30<00:00, 23.1MB/s]
100% 1.96G/1.96G [01:30<00:00, 23.1MB/s]


In [5]:
!unzip -qq 100-bird-species.zip

2 Construct a model

1. Convert the jpg data into numpy arrays. THe file paths are shown in birds.csv

train_data:(samples, height, width, rgb)

In [6]:
import csv

with open("birds.csv", "r") as f:
    dataReader = csv.reader(f)
    i = 0
    for row in dataReader:
        print(row)
        i += 1
        if i == 5:
            break

['class id', 'filepaths', 'labels', 'data set', 'scientific name']
['0.0', 'train/ABBOTTS BABBLER/001.jpg', 'ABBOTTS BABBLER', 'train', 'MALACOCINCLA ABBOTTI']
['0.0', 'train/ABBOTTS BABBLER/007.jpg', 'ABBOTTS BABBLER', 'train', 'MALACOCINCLA ABBOTTI']
['0.0', 'train/ABBOTTS BABBLER/008.jpg', 'ABBOTTS BABBLER', 'train', 'MALACOCINCLA ABBOTTI']
['0.0', 'train/ABBOTTS BABBLER/009.jpg', 'ABBOTTS BABBLER', 'train', 'MALACOCINCLA ABBOTTI']


In [18]:
import numpy as np
valid_label = []
with open("birds.csv", "r") as f:
    dataReader = csv.reader(f)
    for row in dataReader:
        if row[3] == "valid":
            valid_label.append(row[2])
valid_label = np.array(valid_label)
valid_label_shuffled = np.random.permutation(valid_label)
random_classifier_accuracy = []

for _ in range(5):
    T = 0
    for i in range(len(valid_label)):
        if valid_label[i] == valid_label_shuffled[i]:
            T += 1
    random_classifier_accuracy.append(T/len(valid_label))
print(random_classifier_accuracy)
print(sum(random_classifier_accuracy)/5)




[0.004190476190476191, 0.004190476190476191, 0.004190476190476191, 0.004190476190476191, 0.004190476190476191]
0.004190476190476191


In [7]:
from tensorflow import keras
from tensorflow.keras import layers
inputs = keras.Input(shape=(224,224,3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(units=525, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

3 Preprocess the data

In [8]:
!pip install Pillow



In [9]:
from tensorflow.keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    "train",
    image_size=(224,224),
    batch_size=512
)
valid_dataset = image_dataset_from_directory(
    "valid",
    image_size=(224,224),
    batch_size=512
)
test_dataset = image_dataset_from_directory(
    "test",
    image_size=(224,224),
    batch_size=512
)

Found 84635 files belonging to 525 classes.
Found 2625 files belonging to 525 classes.
Found 2625 files belonging to 525 classes.


4 Train

In [10]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convnet_bird.keras",
        save_best_only=True,
        monitor="val_loss"
    ),
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=2
    )
]

model.compile(
    optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
history = model.fit(
    train_dataset,
    epochs=30,
    validation_data=valid_dataset,
    callbacks=callbacks
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


Validation accuracy of 56 %.



**2. Data augmentation**

In [21]:
#Model construction
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.2)
])

inputs = keras.Input(shape=(224,224,3))
x = data_augmentation(inputs)
x = layers.Rescaling(1./255)(x)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(units=525, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

In [24]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 sequential (Sequential)     (None, 224, 224, 3)       0         
                                                                 
 rescaling_1 (Rescaling)     (None, 224, 224, 3)       0         
                                                                 
 conv2d_5 (Conv2D)           (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 111, 111, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_6 (Conv2D)           (None, 109, 109, 64)      18496     
                                                           

In [23]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convnet_bird.keras",
        save_best_only=True,
        monitor="val_loss"
    ),
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=2
    )
]

model.compile(
    optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
history = model.fit(
    train_dataset,
    epochs=30,
    validation_data=valid_dataset,
    callbacks=callbacks
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


Validation accuracy of 59% ;slight improvement.


**3 Deepen the layers**

In [25]:
#Model construction
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.2)
])

inputs = keras.Input(shape=(224,224,3))
x = data_augmentation(inputs)
x = layers.Rescaling(1./255)(x)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=512, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=512, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(units=525, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

In [27]:
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 sequential_1 (Sequential)   (None, 224, 224, 3)       0         
                                                                 
 rescaling_2 (Rescaling)     (None, 224, 224, 3)       0         
                                                                 
 conv2d_10 (Conv2D)          (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 111, 111, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 109, 109, 64)      18496     
                                                           

In [26]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convnet_bird.keras",
        save_best_only=True,
        monitor="val_loss"
    ),
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=2
    )
]

model.compile(
    optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
history = model.fit(
    train_dataset,
    epochs=30,
    validation_data=valid_dataset,
    callbacks=callbacks
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30


Validation accuracy of 61 %

Takeaway: Keep stacking convolution layers until the width and height of the images are 3.