In [2]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Scikit-learn
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, 
    classification_report, 
    confusion_matrix, 
    f1_score, 
    precision_score, 
    recall_score
)
from sklearn.model_selection import (
    StratifiedKFold, 
    StratifiedShuffleSplit, 
    cross_val_score, 
    train_test_split
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import (
    LabelEncoder, 
    OneHotEncoder, 
    OrdinalEncoder, 
    StandardScaler
)

# Other machine learning libraries
import lightgbm as lgb
import xgboost as xgb

# Visualization
import seaborn as sns

# MLflow
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
import mlflow
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import lightgbm as lgb
from scipy.stats import randint, uniform
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import FunctionTransformer

In [3]:
data = pd.read_csv("tabular_data.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46737 entries, 0 to 46736
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   genus            46737 non-null  object 
 1   species          46737 non-null  object 
 2   latitude         46737 non-null  float64
 3   longitude        46737 non-null  float64
 4   quality          46737 non-null  object 
 5   file_name        46737 non-null  object 
 6   simplified_type  46737 non-null  object 
 7   season           46737 non-null  object 
 8   time_of_day      46737 non-null  object 
 9   length_seconds   46737 non-null  int64  
 10  processed_file   46737 non-null  object 
 11  augmentations    35019 non-null  object 
 12  feature_vector   46737 non-null  object 
dtypes: float64(2), int64(1), object(10)
memory usage: 4.6+ MB


In [20]:
spectrogram_data = data[['species','processed_file']]
spectrogram_data.head()

Unnamed: 0,species,processed_file
0,canadensis,Branta_canadensis_Whitewater_Township_near__Ha...
1,canadensis,Branta_canadensis_Whitewater_Township_near__Ha...
2,canadensis,Branta_canadensis_Whitewater_Township_near__Ha...
3,canadensis,Branta_canadensis_Whitewater_Township_near__Ha...
4,canadensis,Branta_canadensis_Whitewater_Township_near__Ha...


In [21]:
y = spectrogram_data['species']
X = spectrogram_data['processed_file']


In [22]:
X[22]

'Branta_canadensis_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_691528_segment_3.wav'

In [23]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_index, test_index = next(sss.split(X, y))
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y[train_index], y[test_index]

In [30]:

# Prepare data

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

In [40]:
import os
import numpy as np
import librosa
import tensorflow as tf
import mlflow
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import layers, models, Input

# MLflow setup
mlflow.set_experiment("Bird Call Identification")

# Directories
AUG_DIR = 'Augmented Recordings'
PROC_DIR = 'Processed Recordings'

# Parameters
batch_size = 32
num_classes = 121
epochs = 30
input_shape = (128, 500, 1)
embedding_dim = 64

# Data processing functions
def load_audio(file_path, sr=44100, n_fft=1024, hop_length=441, n_mels=128, fixed_length=500):
    y, sr = librosa.load(file_path, sr=sr)
    mel_spec = librosa.feature.melspectrogram(
    y=y, 
    sr=sr, 
    n_fft=n_fft, 
    hop_length=hop_length, 
    n_mels=n_mels
    )
    mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    # Pad or crop to fixed length
    if mel_spec.shape[1] < fixed_length:
        pad_width = fixed_length - mel_spec.shape[1]
        mel_spec = np.pad(mel_spec, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_spec = mel_spec[:, :fixed_length]
    
    return mel_spec[..., np.newaxis]

def data_generator(X, y, batch_size):
    while True:
        # Use index-based iteration instead of permutation
        for start in range(0, len(X), batch_size):
            end = min(start + batch_size, len(X))
            batch_X, batch_y = [], []
            
            for i in range(start, end):
                file_name = X.iloc[i]  # Use iloc for positional indexing
                label = y[i]
                
                # Check AUG_DIR first, then PROC_DIR
                aug_path = os.path.join(AUG_DIR, file_name)
                proc_path = os.path.join(PROC_DIR, file_name)
                
                if os.path.exists(aug_path):
                    file_path = aug_path
                elif os.path.exists(proc_path):
                    file_path = proc_path
                else:
                    print(f"File not found: {file_name}")
                    continue
                
                try:
                    mel_spec = load_audio(file_path)
                    batch_X.append(mel_spec)
                    batch_y.append(label)
                except Exception as e:
                    print(f"Error processing file {file_name}: {str(e)}")
                    continue
                
            if batch_X and batch_y:
                yield np.array(batch_X), np.array(batch_y)
            else:
                print("Warning: Empty batch generated")

def create_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    
    # First Convolutional Block
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.2)(x)
    
    # Second Convolutional Block
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.2)(x)
    
    # Third Convolutional Block
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.2)(x)
    
    # Fourth Convolutional Block
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.2)(x)
    
    # Flatten the output and feed it into dense layers
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    
    # Output layer
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    
    return model


# Create the model
model = create_model(input_shape, num_classes)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# MLflow tracking
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("batch_size", batch_size)
    
    # Train the model with generator
    history = model.fit(
        data_generator(X_train, y_train_enc, batch_size),
        steps_per_epoch=len(X_train) // batch_size,
        validation_data=data_generator(X_test, y_test_enc, batch_size),
        validation_steps=len(X_test) // batch_size,
        epochs=epochs,
        callbacks=[
        ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss'),
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    ]
    )

    # Log model and metrics
    mlflow.keras.log_model(model, "cnn_lstm_model")
    mlflow.log_metrics({
        "train_accuracy": history.history['accuracy'][-1],
        "val_accuracy": history.history['val_accuracy'][-1],
        "train_loss": history.history['loss'][-1],
        "val_loss": history.history['val_loss'][-1],
    })

Epoch 1/30
[1m1168/1168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3304s[0m 3s/step - accuracy: 0.0367 - loss: 5.0663 - val_accuracy: 0.1463 - val_loss: 3.7087
Epoch 2/30
[1m 104/1168[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m49:17[0m 3s/step - accuracy: 0.1187 - loss: 3.9325

KeyboardInterrupt: 