In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Load your data
# Assuming X and y are already defined
# X, y = load_your_data()  # replace with actual data loading

# Initialize MLflow experiment
mlflow.set_experiment("Bird Call Identification")

# Function to split data
def split_data(X, y, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    return X_train, X_test, y_train, y_test

# Define models to train
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
}

# Function to perform cross-validation, train, and log models with MLflow
def train_and_log_models(models, X_train, y_train, X_test, y_test):
    skf = StratifiedKFold(n_splits=5)
    
    for model_name, model in models.items():
        with mlflow.start_run(run_name=model_name):
            # Create a pipeline with scaling and the model
            pipeline = Pipeline([('scaler', StandardScaler()), ('model', model)])
            
            # Cross-validation
            cv_scores = cross_val_score(pipeline, X_train, y_train, cv=skf, scoring='f1_macro')
            avg_cv_score = np.mean(cv_scores)
            print(f"{model_name} CV F1 Score: {avg_cv_score}")
            
            # Train on the entire training data
            pipeline.fit(X_train, y_train)
            y_pred = pipeline.predict(X_test)
            
            # Calculate metrics
            accuracy = accuracy_score(y_test, y_pred)
            f1_macro = f1_score(y_test, y_pred, average='macro')
            
            # Log metrics to MLflow
            mlflow.log_param("Model", model_name)
            mlflow.log_metric("Accuracy", accuracy)
            mlflow.log_metric("F1 Macro", f1_macro)
            mlflow.log_metric("CV F1 Macro", avg_cv_score)
            
            # Generate confusion matrix
            conf_matrix = confusion_matrix(y_test, y_pred)
            fig, ax = plt.subplots(figsize=(10, 8))
            sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax)
            ax.set_title(f"{model_name} Confusion Matrix")
            ax.set_xlabel("Predicted")
            ax.set_ylabel("Actual")
            
            # Save the plot as an artifact
            plt.savefig(f"{model_name}_confusion_matrix.png")
            mlflow.log_artifact(f"{model_name}_confusion_matrix.png")
            
            # Close the plot to avoid overlap in the next iteration
            plt.close(fig)
            
            # Print classification report
            print(f"Classification Report for {model_name}:\n", classification_report(y_test, y_pred))
            
            # Log the model
            mlflow.sklearn.log_model(pipeline, model_name)

# Split data
X_train, X_test, y_train, y_test = split_data(X, y)

# Train and log models
train_and_log_models(models, X_train, y_train, X_test, y_test)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, BatchNormalization, GRU
from tensorflow.keras.layers import Input, Reshape
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping

# Assuming X_train, X_test, y_train, y_test are already split and spectrograms are ready
# X_train, X_test - shape (num_samples, height, width, channels)
# y_train, y_test - shape (num_samples,)

# Encode labels for neural network compatibility
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
y_train_categorical = to_categorical(y_train_encoded)
y_test_categorical = to_categorical(y_test_encoded)

# CNN model for spectrograms
def create_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        
        Conv2D(128, kernel_size=(3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

# LSTM model for spectrogram sequences
def create_lstm_model(input_shape, num_classes):
    model = Sequential([
        Reshape(target_shape=(-1, input_shape[1] * input_shape[2]), input_shape=input_shape),
        LSTM(128, return_sequences=True),
        LSTM(64),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

# Function to train and log neural network models with MLflow
def train_and_log_nn_models(X_train, y_train, X_test, y_test):
    input_shape = X_train.shape[1:]  # (height, width, channels)
    num_classes = y_train.shape[1]
    
    models = {
        "CNN": create_cnn_model(input_shape, num_classes),
        "LSTM": create_lstm_model(input_shape, num_classes)
    }
    
    for model_name, model in models.items():
        with mlflow.start_run(run_name=model_name):
            # Compile model
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
            
            # Add EarlyStopping to prevent overfitting
            early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
            
            # Train model
            history = model.fit(
                X_train, y_train,
                validation_data=(X_test, y_test),
                epochs=30,
                batch_size=32,
                callbacks=[early_stopping],
                verbose=2
            )
            
            # Evaluate model
            loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
            print(f"{model_name} Test Accuracy: {accuracy}")
            
            # Log parameters and metrics
            mlflow.log_param("Model Type", model_name)
            mlflow.log_metric("Test Accuracy", accuracy)
            
            # Save model to MLflow
            mlflow.keras.log_model(model, model_name)
            
            # Plot training history
            fig, ax = plt.subplots(1, 2, figsize=(12, 4))
            
            # Accuracy plot
            ax[0].plot(history.history['accuracy'], label='train accuracy')
            ax[0].plot(history.history['val_accuracy'], label='val accuracy')
            ax[0].set_title(f"{model_name} - Accuracy")
            ax[0].set_xlabel("Epochs")
            ax[0].set_ylabel("Accuracy")
            ax[0].legend()
            
            # Loss plot
            ax[1].plot(history.history['loss'], label='train loss')
            ax[1].plot(history.history['val_loss'], label='val loss')
            ax[1].set_title(f"{model_name} - Loss")
            ax[1].set_xlabel("Epochs")
            ax[1].set_ylabel("Loss")
            ax[1].legend()
            
            # Save plots as artifacts
            plt.savefig(f"{model_name}_training_history.png")
            mlflow.log_artifact(f"{model_name}_training_history.png")
            
            # Close plots
            plt.close(fig)

# Call the function with your spectrogram data
train_and_log_nn_models(X_train, y_train_categorical, X_test, y_test_categorical)
