In [9]:
# from google.colab import drive

# drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# import zipfile
# with zipfile.ZipFile('/content/train.zip', 'r') as zip_ref:
#   # Extract all the contents into the directory
#   zip_ref.extractall('/content')

In [5]:
import os
print(os.cpu_count())
TRAIN_PATH = os.path.join("/content/", "train")
TEST_PATH = os.path.join("/content/",  "test")
VAL_PATH = os.path.join("/content/",  "val")

12


In [6]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
else:
    print("No GPU was detected. TensorFlow will run on CPU.")


In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the path to your spectrogram images
train_directory = TRAIN_PATH
validation_directory = VAL_PATH

SPECTROGRAM_WIDTH = 884
SPECTROGRAM_HEIGHT = 322
BATCH_SIZE=32
# Image dimensions
img_width, img_height = SPECTROGRAM_WIDTH, SPECTROGRAM_HEIGHT

# Rescale the images by dividing every pixel in every image by 255
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

# Load images from the directory and prepare them for training
train_generator = train_datagen.flow_from_directory(
    train_directory,
    target_size=(img_height, img_width),
    batch_size=BATCH_SIZE,
    class_mode='categorical')

validation_generator = validation_datagen.flow_from_directory(
    validation_directory,
    target_size=(img_height, img_width),
    batch_size=BATCH_SIZE,
    class_mode='categorical')


Found 17781 images belonging to 10 classes.
Found 6170 images belonging to 10 classes.


In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

CHANNELS = 3
NUM_CLASSES = 10

# Define the CNN model
model = Sequential()

# Convolutional layer with 32 filters, a kernel size of 3x3, and ReLU activation
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(SPECTROGRAM_HEIGHT, SPECTROGRAM_WIDTH, CHANNELS)))
model.add(MaxPooling2D((2, 2)))

# Adding another convolutional layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

# Adding another convolutional layer
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

# Flatten the output of the convolutional layers
model.add(Flatten())

# Fully connected layer with 128 units and ReLU activation
model.add(Dense(128, activation='relu'))

# Dropout for regularization
model.add(Dropout(0.5))

# Output layer with a unit for each class and softmax activation
model.add(Dense(NUM_CLASSES, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 320, 882, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 160, 441, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 158, 439, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 79, 219, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 77, 217, 128)      73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 38, 108, 128)      0

In [9]:
EPOCHS = 10

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    TEST_PATH,
    target_size=(img_height, img_width),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False)  # It's important not to shuffle the test data


Found 5576 images belonging to 10 classes.


In [11]:
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // BATCH_SIZE)
print("Test accuracy:", test_accuracy)
print("Test loss:", test_loss)


Test accuracy: 0.8040589094161987
Test loss: 0.9894700050354004


In [17]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Predict classes
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)

# Get true labels
true_classes = test_generator.classes

# Calculate classification report and confusion matrix
report = classification_report(true_classes, predicted_classes, target_names=test_generator.class_indices.keys())
conf_matrix = confusion_matrix(true_classes, predicted_classes)

print(report)
print(conf_matrix)


              precision    recall  f1-score   support

      barswa       0.65      0.66      0.66       428
     cohmar1       0.79      0.70      0.74       552
     combuz1       0.77      0.70      0.73       504
      comsan       0.82      0.84      0.83       392
     eaywag1       0.77      0.65      0.70       344
     eubeat1       0.93      0.90      0.91       492
      litegr       0.80      0.81      0.80       246
     thrnig1       0.86      0.93      0.89      1654
      wlwwar       0.72      0.80      0.76       684
      woosan       0.83      0.63      0.72       280

    accuracy                           0.80      5576
   macro avg       0.79      0.76      0.78      5576
weighted avg       0.80      0.80      0.80      5576

[[ 283   36    8   15   11    0    6   22   31   16]
 [  88  387    5    5   15    0    3   35   11    3]
 [  13   23  355    9    4    4    2   54   40    0]
 [   2    1   18  330   15    2    6    8    8    2]
 [   5   19    7   14  223   

In [15]:
preds = model.predict(test_generator)



In [16]:
len(preds)

5576

In [19]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [20]:
model.save('/content/drive/MyDrive/P2_DeepLearning/birdclef-2023/prep_data/bird_pred.h5')
