In [5]:
%load_ext autoreload
%autoreload 2

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
from config import *
from preprocessing import SpectrogramSequence
from util import Dataset

annotations_df: pd.DataFrame = pd.read_csv(ANNOTATIONS / 'initial_dataset_7depl_metadata.csv')
has_annotations = "1_20230316_063000.wav"
has_annotations_path = ds.get_data_path(1, 1) / has_annotations

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:

# NOTE paper has only 40 freq bins for chunk (i have >500 - too high sr?)

def CRNN(input_shape, num_classes, n_filters):
    freq_len, time_len, _ = input_shape
    
    model = keras.Sequential()
    model.add(layers.Input(shape=input_shape))
    
    # CNN layers
    for filters in n_filters:
        model.add(layers.Conv2D(filters, (3, 3), activation='relu', padding='same', strides=(2, 1)))
        model.add(layers.MaxPooling2D((2, 1))) # frequency pooling only

    model.add(layers.Flatten())
    model.add(layers.Reshape((-1, time_len))) # input matrix for RNN shape=(new_features, time_frames)

    # RNN layers
    model.add(layers.LSTM(128, return_sequences=True))
    model.add(layers.Dense(num_classes, activation='sigmoid'))  
    
    return model


model = CRNN(input_shape=(512, 428, 1), num_classes=4, n_filters=[32, 64, 128, 256])
model.compile(optimizer='adam', 
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

2024-08-16 21:18:06.327612: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 428, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 428, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 428, 64)       18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 32, 428, 64)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 428, 128)      73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 8, 428, 128)      0

shuffle annotations but keep recordings clumped
in order to maintain class balance between train/test/valid (deployments are inbalanced)


In [7]:
grouped = annotations_df.groupby('recording')

shuffled_groups = list(grouped)  
np.random.shuffle(shuffled_groups)  

shuffled_df = pd.concat([df for _, df in shuffled_groups], ignore_index=True)

shuffled_df.reset_index(drop=True, inplace=True)

In [8]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(shuffled_df, test_size=0.2, shuffle=False)
train_df, validation_df = train_test_split(train_df, test_size=0.2, shuffle=False)

for i in (validation_df, test_df, train_df):
    print(i.shape)

(351, 13)
(438, 13)
(1401, 13)


In [11]:
sr = 22_000
batch = 32

train_sequence = SpectrogramSequence(annotations_df=train_df, sr=sr, batch_size=batch)
test_sequence = SpectrogramSequence(annotations_df=test_df, sr=sr, batch_size=batch)
validation_sequence = SpectrogramSequence(annotations_df=validation_df, sr=sr, batch_size=batch)

for s in (train_sequence, test_sequence, validation_sequence):
    print(len(s.chunk_info))

n# frames in chunk:  428


preparing data: 100%|██████████| 188/188 [00:09<00:00, 20.20it/s]


n# frames in chunk:  428


preparing data: 100%|██████████| 78/78 [00:03<00:00, 20.90it/s]


n# frames in chunk:  428


preparing data: 100%|██████████| 37/37 [00:01<00:00, 33.64it/s]

11280
4677
2220





In [67]:
batch_X, batch_Y = train_sequence.__getitem__(0)
print(batch_X[0].shape, batch_Y[0].shape)

loading new recording: 1_20230316_063000.wav 			| Time Taken: 5.862738 seconds
resampling 1_20230316_063000.wav 			| Time Taken: 2.092499 seconds
stft 1_20230316_063000.wav 			| Time Taken: 1.383413 seconds
converting to db 1_20230316_063000.wav 			| Time Taken: 0.949440 seconds
Total: 10.30 seconds
(512, 428) (428, 4)


In [12]:
epochs = 2

# early_stopping_cb = tf.keras.callbacks.EarlyStopping(
#     monitor='val_prc',
#     verbose=1,
#     patience=30,
#     mode='max',
#     restore_best_weights=True
# )

history = model.fit(
    train_sequence, 
    epochs=epochs, 
    validation_data=validation_sequence)
                    # callbacks=[early_stopping_cb])

df = pd.DataFrame(history.history)
df[['prc', 'val_prc', 'recall', 'val_recall']].plot()


loading new recording: 1_20230317_063000.wav 			| Time Taken: 17.887911 seconds
resampling 1_20230317_063000.wav 			| Time Taken: 12.127509 seconds
stft 1_20230317_063000.wav 			| Time Taken: 3.312986 seconds
converting to db 1_20230317_063000.wav 			| Time Taken: 1.146981 seconds


DEBUG:root:batch average spectrogram chunk shape: [512. 428.]
DEBUG:root:batch average Y shape: [428.   4.]


Total: 34.49 seconds
Epoch 1/2
loading new recording: 1_20230514_060000.wav 			| Time Taken: 11.318040 seconds
resampling 1_20230514_060000.wav 			| Time Taken: 3.394469 seconds
stft 1_20230514_060000.wav 			| Time Taken: 2.291393 seconds
converting to db 1_20230514_060000.wav 			| Time Taken: 1.719308 seconds


DEBUG:root:batch average spectrogram chunk shape: [512. 428.]
DEBUG:root:batch average Y shape: [428.   4.]


Total: 18.74 seconds
loading new recording: 1_20230317_063000.wav 

2024-08-16 21:21:56.221283: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 448790528 exceeds 10% of free system memory.
2024-08-16 21:21:59.082725: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 224395264 exceeds 10% of free system memory.
2024-08-16 21:22:16.719062: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 224395264 exceeds 10% of free system memory.
2024-08-16 21:22:24.978124: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 112197632 exceeds 10% of free system memory.
2024-08-16 21:22:26.061184: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 112197632 exceeds 10% of free system memory.


: 

In [None]:
import soundfile as sf