### Extract features from the processed and augmented audio files.

In [8]:
# Imports
import os
import random
import logging
import warnings
import gc

import numpy as np
import pandas as pd
from tqdm import tqdm


# Data processing and scientific computing
from scipy.io import wavfile
from scipy.signal import butter, lfilter
from scipy.spatial.distance import cosine

# Audio processing
import librosa
import soundfile as sf

# Visualization
import matplotlib.pyplot as plt
import plotly.express as px

# Set up logging
logging.basicConfig(level=logging.INFO)

In [9]:
# Load the prepared csv
augmented_data = pd.read_csv("augmented_data.csv")

In [10]:
augmented_data.head()

Unnamed: 0,genus,species,latitude,longitude,quality,file_name,simplified_type,season,time_of_day,length_seconds,processed_file,augmentations
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...,
1,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...,
2,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...,
3,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...,
4,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...,


In [11]:
def is_valid_audio(audio, sr, min_duration=0.1, silence_threshold=-60):
    """Check if the audio segment is valid (not too short and not silent)."""
    duration = librosa.get_duration(y=audio, sr=sr)
    if duration < min_duration:
        return False

    # Check if the audio is mostly silent
    db = librosa.amplitude_to_db(np.abs(audio), ref=np.max)
    if np.mean(db) < silence_threshold:
        return False

    return True

def extract_features(audio, sr):
    # Mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(audio)[0]
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
    
    return {
        'mel_spectrogram_db': mel_spec_db,
        'mfccs': mfccs,
        'spectral_centroids': spectral_centroids,
        'chroma': chroma,
        'zero_crossing_rate': zero_crossing_rate,
        'spectral_rolloff': spectral_rolloff
    }

def summarize_feature(feature):
    if feature.ndim == 1:
        return np.array([np.mean(feature), np.std(feature), np.max(feature)])
    elif feature.ndim == 2:
        return np.hstack([
            np.mean(feature, axis=1),
            np.std(feature, axis=1),
            np.max(feature, axis=1)
        ])
    else:
        raise ValueError(f"Unsupported feature dimension: {feature.ndim}")

def save_mel_spectrogram(mel_spec, output_dir, base_filename, sr):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spec, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-spectrogram')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{base_filename}_mel_spectrogram.png"))
    plt.close()


In [12]:
def process_audio_file(file_path, output_dir):
    try:
        # Load audio file
        audio, sr = librosa.load(file_path, sr=None)
        
        # Check if audio segment is valid
        if not is_valid_audio(audio, sr):
            print(f"Warning: Audio file {file_path} is too short or silent. Skipping.")
            return None, None

        # Extract features
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            features = extract_features(audio, sr)
        
        # Summarize features
        feature_summary = {}
        for key, value in features.items():
            if key != 'mel_spectrogram_db':
                feature_summary[f"{key}_summary"] = summarize_feature(value)
        
        # Create feature vector
        feature_vector = np.hstack([
            feature_summary.get('mfccs_summary', np.array([])),
            feature_summary.get('spectral_centroids_summary', np.array([])),
            feature_summary.get('chroma_summary', np.array([])),
            feature_summary.get('zero_crossing_rate_summary', np.array([])),
            feature_summary.get('spectral_rolloff_summary', np.array([]))
        ])
        
        # Save mel-spectrogram as image
        base_filename = os.path.splitext(os.path.basename(file_path))[0]
        save_mel_spectrogram(features['mel_spectrogram_db'], output_dir, base_filename, sr)
        
        return feature_vector, features
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None, None

def process_audio_files_batch(df, base_dir, batch_size=500):
    feature_data = []
    skipped_files = []
    
    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i+batch_size]
        batch_feature_data = []  # Temporary storage for batch features
        for _, row in tqdm(batch.iterrows(), total=len(batch)):
            possible_paths = [
                os.path.join(base_dir, 'Augmented Recordings', row['processed_file']),
                os.path.join(base_dir, 'Processed Recordings', row['processed_file'])
            ]
            
            file_path = next((path for path in possible_paths if os.path.exists(path)), None)
            
            if file_path is None:
                print(f"File not found: {row['processed_file']}")
                skipped_files.append(row['processed_file'])
                continue
            
            feature_vector, features = process_audio_file(file_path, output_dir="mel-spectrograms")
            
            if feature_vector is not None:
                batch_feature_data.append({
                    'processed_file': row['processed_file'],
                    'feature_vector': feature_vector,
                })
            else:
                skipped_files.append(row['processed_file'])
            
            # More frequent garbage collection to avoid memory overflow
            gc.collect()
        
        # Append batch data to main feature data and save to disk
        feature_data.extend(batch_feature_data)
        batch_df = pd.DataFrame(batch_feature_data)
        batch_df.to_csv(f"feature_data_batch_{i//batch_size}.csv", index=False)
        
        # Clear batch data from memory
        batch_feature_data.clear()
        gc.collect()

    print(f"Total files skipped: {len(skipped_files)}")
    return feature_data, skipped_files


In [13]:
# Usage

base_dir = os.getcwd()
output_dir = 'mel-spectrograms'

feature_data, skipped_files = process_audio_files_batch(augmented_data, base_dir)

# Convert feature_data to DataFrame
feature_df = pd.DataFrame(feature_data)

# Merge the new feature DataFrame with the existing final_data DataFrame
final_data = pd.merge(augmented_data, feature_df, on='processed_file', how='left')

# Drop rows corresponding to skipped files
final_data = final_data[~final_data['processed_file'].isin(skipped_files)]

print(final_data.info())

print(f"\nTotal files in augmented_data: {len(augmented_data)}")
print(f"Files successfully processed: {len(feature_df)}")
print(f"Files skipped (too short or silent): {len(skipped_files)}")
print(f"Files in final_data after dropping skipped files: {len(final_data)}")

Testing feature extraction on file: Augmented Recordings\Setophaga_magnolia_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_828025_segment_5_aug_3.wav
mel_spectrogram_db shape: (128, 453)
mfccs shape: (13, 453), mean: -7.0931, std: 97.3944
spectral_centroids shape: (453,), mean: 2883.1173, std: 1035.0546
chroma shape: (12, 453), mean: 0.8048, std: 0.1477
zero_crossing_rate shape: (453,), mean: 0.0173, std: 0.0195
spectral_rolloff shape: (453,), mean: 6860.2028, std: 2504.3488

Summarized Features:
mfccs_summary type: numpy.ndarray, shape: (39,), mean: 4.5996, std: 70.4689
spectral_centroids_summary type: numpy.ndarray, shape: (3,), mean: 4322.4834, std: 3426.4439
chroma_summary type: numpy.ndarray, shape: (36,), mean: 0.6488, std: 0.3681
zero_crossing_rate_summary type: numpy.ndarray, shape: (3,), mean: 0.0705, std: 0.0737
spectral_rolloff_summary type: numpy.ndarray, shape: (3,), mean: 9122.1031, std: 6525.8239

Final feature vector shape: (84,)
Mel-spectrogram saved as: Setophaga_magnolia_

In [7]:
# Usage

base_dir = os.getcwd()
output_dir = 'mel-spectrograms'

feature_data, skipped_files = process_audio_files_batch(augmented_data, base_dir)

# Convert feature_data to DataFrame
feature_df = pd.DataFrame(feature_data)

# Merge the new feature DataFrame with the existing final_data DataFrame
final_data = pd.merge(augmented_data, feature_df, on='processed_file', how='left')

# Drop rows corresponding to skipped files
final_data = final_data[~final_data['processed_file'].isin(skipped_files)]

print(final_data.info())

print(f"\nTotal files in augmented_data: {len(augmented_data)}")
print(f"Files successfully processed: {len(feature_df)}")
print(f"Files skipped (too short or silent): {len(skipped_files)}")
print(f"Files in final_data after dropping skipped files: {len(final_data)}")

  3%|▎         | 26/1000 [00:07<04:52,  3.33it/s]



 11%|█         | 111/1000 [00:32<04:05,  3.63it/s]



 12%|█▏        | 120/1000 [00:35<04:51,  3.02it/s]



 15%|█▍        | 147/1000 [00:42<03:59,  3.56it/s]



 17%|█▋        | 167/1000 [00:48<04:24,  3.15it/s]



 19%|█▉        | 188/1000 [00:54<04:15,  3.18it/s]



 25%|██▍       | 246/1000 [01:10<03:24,  3.68it/s]



 28%|██▊       | 279/1000 [01:19<03:18,  3.63it/s]



 28%|██▊       | 282/1000 [01:20<02:45,  4.35it/s]



 29%|██▊       | 286/1000 [01:21<02:53,  4.11it/s]



 30%|██▉       | 295/1000 [01:24<03:18,  3.56it/s]



 30%|███       | 301/1000 [01:25<02:57,  3.93it/s]



 34%|███▍      | 342/1000 [01:36<02:56,  3.72it/s]



 35%|███▍      | 346/1000 [01:37<02:34,  4.24it/s]



 35%|███▌      | 350/1000 [01:37<02:27,  4.42it/s]



 50%|█████     | 501/1000 [02:20<02:16,  3.66it/s]



 71%|███████   | 710/1000 [03:20<01:24,  3.44it/s]



 71%|███████▏  | 714/1000 [03:21<01:11,  4.02it/s]



 72%|███████▏  | 719/1000 [03:22<01:08,  4.07it/s]



 78%|███████▊  | 780/1000 [03:41<01:01,  3.58it/s]



 79%|███████▉  | 791/1000 [03:44<00:56,  3.69it/s]



 81%|████████▏ | 813/1000 [03:50<00:51,  3.66it/s]



 82%|████████▏ | 823/1000 [03:52<00:48,  3.61it/s]



 86%|████████▌ | 861/1000 [04:03<00:38,  3.57it/s]



100%|██████████| 1000/1000 [04:46<00:00,  3.49it/s]
  3%|▎         | 31/1000 [00:08<04:26,  3.64it/s]



 15%|█▍        | 146/1000 [00:40<04:37,  3.07it/s]



 18%|█▊        | 185/1000 [00:51<03:48,  3.56it/s]



 22%|██▏       | 215/1000 [01:00<03:37,  3.61it/s]



 22%|██▏       | 220/1000 [01:01<03:18,  3.93it/s]



 26%|██▌       | 262/1000 [01:20<05:00,  2.45it/s]



 29%|██▉       | 292/1000 [01:28<03:22,  3.50it/s]



 30%|██▉       | 298/1000 [01:29<02:47,  4.19it/s]



 31%|███       | 312/1000 [01:33<03:11,  3.59it/s]



 32%|███▏      | 316/1000 [01:34<02:49,  4.03it/s]



 32%|███▎      | 325/1000 [01:36<03:09,  3.57it/s]



 33%|███▎      | 330/1000 [01:37<02:49,  3.95it/s]



 46%|████▌     | 457/1000 [02:13<02:34,  3.51it/s]



 46%|████▌     | 460/1000 [02:14<02:08,  4.22it/s]



 47%|████▋     | 472/1000 [02:17<02:29,  3.54it/s]



 51%|█████     | 506/1000 [02:26<02:24,  3.43it/s]



 51%|█████     | 509/1000 [02:27<01:58,  4.14it/s]



 52%|█████▏    | 517/1000 [02:29<02:12,  3.63it/s]



 53%|█████▎    | 526/1000 [02:31<02:10,  3.62it/s]



 54%|█████▍    | 540/1000 [02:35<02:10,  3.53it/s]



 56%|█████▋    | 563/1000 [02:42<02:06,  3.46it/s]



 59%|█████▊    | 586/1000 [02:56<04:02,  1.71it/s]



 66%|██████▋   | 663/1000 [03:19<01:39,  3.37it/s]



 67%|██████▋   | 669/1000 [03:21<01:30,  3.67it/s]



 70%|██████▉   | 699/1000 [03:29<01:29,  3.37it/s]



 71%|███████   | 712/1000 [03:33<01:22,  3.48it/s]



 72%|███████▏  | 722/1000 [03:36<01:22,  3.37it/s]



 74%|███████▎  | 735/1000 [03:39<01:18,  3.40it/s]



 77%|███████▋  | 768/1000 [03:49<01:09,  3.35it/s]



 78%|███████▊  | 782/1000 [03:53<01:01,  3.54it/s]



 90%|█████████ | 903/1000 [04:28<00:28,  3.45it/s]



 91%|█████████ | 909/1000 [04:30<00:25,  3.59it/s]



 98%|█████████▊| 982/1000 [05:06<00:11,  1.53it/s]



100%|█████████▉| 997/1000 [05:10<00:00,  3.29it/s]



100%|██████████| 1000/1000 [05:10<00:00,  3.22it/s]
  1%|▏         | 14/1000 [00:05<05:16,  3.12it/s]



  3%|▎         | 29/1000 [00:09<05:00,  3.23it/s]



  7%|▋         | 68/1000 [00:22<05:01,  3.09it/s]



  8%|▊         | 79/1000 [00:25<04:49,  3.18it/s]



  9%|▉         | 89/1000 [00:28<04:34,  3.32it/s]



 11%|█▏        | 113/1000 [00:35<04:40,  3.16it/s]



 15%|█▌        | 154/1000 [00:48<04:19,  3.26it/s]



 22%|██▏       | 221/1000 [01:10<03:47,  3.42it/s]



 22%|██▎       | 225/1000 [01:11<03:26,  3.75it/s]



 32%|███▏      | 323/1000 [01:41<03:35,  3.14it/s]



 35%|███▍      | 347/1000 [01:49<04:09,  2.61it/s]



 38%|███▊      | 384/1000 [02:00<02:59,  3.44it/s]



 39%|███▉      | 394/1000 [02:03<02:57,  3.41it/s]



 42%|████▏     | 421/1000 [02:11<03:04,  3.14it/s]



 43%|████▎     | 427/1000 [02:13<02:49,  3.38it/s]



 44%|████▎     | 435/1000 [02:15<02:50,  3.31it/s]



 48%|████▊     | 484/1000 [02:31<02:43,  3.16it/s]



 49%|████▉     | 494/1000 [02:34<02:33,  3.29it/s]



 58%|█████▊    | 578/1000 [03:37<05:27,  1.29it/s]



 61%|██████    | 606/1000 [03:59<05:07,  1.28it/s]



 62%|██████▏   | 618/1000 [04:07<04:27,  1.43it/s]



 62%|██████▎   | 625/1000 [04:11<04:17,  1.46it/s]



 66%|██████▌   | 656/1000 [04:33<04:30,  1.27it/s]



 68%|██████▊   | 678/1000 [04:48<03:59,  1.34it/s]



 70%|██████▉   | 695/1000 [05:01<03:57,  1.29it/s]



 77%|███████▋  | 774/1000 [06:01<02:55,  1.29it/s]



 79%|███████▉  | 789/1000 [06:12<02:44,  1.28it/s]



 80%|███████▉  | 797/1000 [06:18<02:33,  1.32it/s]



 83%|████████▎ | 830/1000 [06:42<02:10,  1.30it/s]



 84%|████████▍ | 845/1000 [06:53<01:59,  1.30it/s]



 86%|████████▌ | 857/1000 [07:02<01:48,  1.32it/s]



 94%|█████████▍| 945/1000 [08:09<00:40,  1.36it/s]



 96%|█████████▌| 958/1000 [08:19<00:31,  1.33it/s]



100%|█████████▉| 997/1000 [08:47<00:02,  1.34it/s]



100%|██████████| 1000/1000 [08:49<00:00,  1.89it/s]

In [None]:
# Review the dataframe
final_data.info()

In [None]:
# Save DataFrame to CSV
final_data.to_csv('final_data.csv', index=False)

print("Processing complete. Summary data saved to 'final_data.csv'.")