In [1]:
# Imports
import os
import random
import logging
import warnings
import gc

import numpy as np
import pandas as pd
from tqdm import tqdm


# Data processing and scientific computing
from scipy.io import wavfile
from scipy.signal import butter, lfilter
from scipy.spatial.distance import cosine

# Audio processing
import librosa
import soundfile as sf

# Visualization
import matplotlib.pyplot as plt
import plotly.express as px

# Set up logging
logging.basicConfig(level=logging.INFO)

### Data Cleansing
    - Remove low-quality or noisy recordings based on metadata if available, or by manually inspecting a few samples.
	    - It's a good idea to manually inspect some of the longer clips, especially outliers.
	    - - Listen for: a) Consistency of the bird call throughout the clip b) Presence of long periods of silence c) Sudden changes in background noise or environment
		- If you find issues, you might consider trimming these clips to the most relevant sections before segmenting.
		- - Definitely inspect a sample of low-rated files.
		- Listen for: a) Clarity of the bird call b) Signal-to-noise ratio (how clear the bird call is compared to background noise) c) Presence of distortions or artifacts
		- Consider setting a threshold for the quality rating, below which you might exclude files from your dataset.
		- Determining if a file is too "noisy":

		- This can be subjective, but here are some approaches: a) Signal-to-Noise Ratio (SNR): Calculate the SNR for each file. Files below a certain threshold could be considered too noisy. b) Spectral analysis: Look at the spectrogram. A very noisy file will have a lot of energy spread across all frequencies. c) Perceptual evaluation: Listen to a sample and rate them yourself. This can help you calibrate your automatic methods.

In [2]:
data = pd.read_csv('preprocessed_data.csv')

In [3]:
data.columns

Index(['genus', 'species', 'latitude', 'longitude', 'quality', 'file_name',
       'simplified_type', 'season', 'time_of_day', 'length_seconds'],
      dtype='object')

In [4]:
data['file_name'].value_counts()

file_name
Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.mp3                 1
Molothrus_ater_Lawrence_Woods_SNP_417996.mp3                                                         1
Agelaius_phoeniceus_Crosby_Township_near__Harrison_Hamilton_County_Ohio_482675.mp3                   1
Agelaius_phoeniceus_Case_Farm_Gates_Mills_Ohio_98722.mp3                                             1
Agelaius_phoeniceus_Ohio_Crane_Creek_SP_17011.mp3                                                    1
                                                                                                    ..
Tachycineta_bicolor_Pelee_Shuttle_Stop_near__Leamington_Essex_County_Ontario_477045.mp3              1
Tachycineta_bicolor_Ohio_Crane_Creek_SP_17046.mp3                                                    1
Progne_subis_Vermilion_River_Reservation:_Mill_Hollow_near__Vermilion_Erie_County_Ohio_784046.mp3    1
Progne_subis_Ottawa_NWR_Lucas_County_Ohio_803616.mp3           

### The metadata contains a column with a 'quality' rating. I'll manually examine those entries.

In [5]:
data['quality'].value_counts()

quality
A           274
B           262
C           116
D            32
E             4
no score      4
Name: count, dtype: int64

In [6]:

low_quality_files = data[(data['quality'] == 'E') | (data['quality'] == 'no score')]
display(low_quality_files)

Unnamed: 0,genus,species,latitude,longitude,quality,file_name,simplified_type,season,time_of_day,length_seconds
182,Vireo,bellii,39.2095,-84.7821,E,Vireo_bellii_Whitewater_Township_near__Harriso...,Song,Summer,Evening,3
197,Baeolophus,bicolor,40.942,-81.5236,no score,Baeolophus_bicolor_Ohio_near__Akron_Summit_Cou...,Call,Summer,Morning,15
227,Tachycineta,bicolor,41.9682,-82.5305,no score,Tachycineta_bicolor_Pelee_near__Leamington_Ess...,Call,Spring,Morning,2
364,Spizella,passerina,41.93338,-83.54994,E,Spizella_passerina_Michigan_Monroe_County_1717...,Song,Summer,Unknown,28
365,Spizella,pusilla,39.8888,-82.7978,no score,Spizella_pusilla_Madison_Township_near__Canal_...,Song,Spring,Afternoon,20
395,Melospiza,melodia,41.93338,-83.54994,E,Melospiza_melodia_Michigan_Monroe_County_17143...,Song,Summer,Unknown,34
563,Geothlypis,trichas,41.1895,-81.5781,E,Geothlypis_trichas_Ohio_near__Peninsula_Summit...,Song,Summer,Morning,30
687,Sonus,naturalis,41.433,-81.418,no score,Sonus_naturalis_Chagrin_Falls_Township_near__M...,Song,Spring,Afternoon,28


### Convert mp3 to wav

In [7]:
def convert_mp3_to_wav(mp3_path, wav_path):
    """
    Convert an MP3 file to WAV format using librosa and soundfile.
    
    Args:
    mp3_path (str): Path to the input MP3 file
    wav_path (str): Path to save the output WAV file
    
    Returns:
    str: Path to the created WAV file
    
    Raises:
    FileNotFoundError: If the input MP3 file is not found
    """
    if not os.path.exists(mp3_path):
        raise FileNotFoundError(f"MP3 file not found: {mp3_path}")
    
    # Load the mp3 file
    audio, sr = librosa.load(mp3_path, sr=None, mono=False)
    
    # Save as wav
    sf.write(wav_path, audio.T, sr)
    
    return wav_path

def batch_convert_to_wav(data, input_dir, output_dir):
    """
    Convert all MP3 files in the dataset to WAV format.
    
    Args:
    data (pd.DataFrame): DataFrame containing file information
    input_dir (str): Directory containing the input MP3 files
    output_dir (str): Directory to save the output WAV files
    
    Returns:
    tuple: (pd.DataFrame, list) Updated DataFrame with new file paths and list of files not found
    """
    os.makedirs(output_dir, exist_ok=True)
    
    new_data = data.copy()
    files_not_found = []
    
    for index, row in new_data.iterrows():
        mp3_path = os.path.join(input_dir, row['file_name'])
        wav_filename = os.path.splitext(row['file_name'])[0] + '.wav'
        wav_path = os.path.join(output_dir, wav_filename)
        
        try:
            convert_mp3_to_wav(mp3_path, wav_path)
            new_data.at[index, 'file_name'] = wav_filename
        except FileNotFoundError:
            files_not_found.append(row['file_name'])
            new_data = new_data.drop(index)
    
    return new_data.reset_index(drop=True), files_not_found

In [8]:
original_dir = 'Original Recordings'
converted_dir = 'Converted Recordings'

# Convert the MP3 files to WAV
print("Converting MP3 files to WAV...")
converted_data, missing_files = batch_convert_to_wav(data, original_dir, converted_dir)

# Print summary
print(f"Conversion complete. {len(converted_data)} files converted.")
print(f"WAV files saved in: {converted_dir}")
print(f"Number of files not found: {len(missing_files)}")

# If you want to examine the list of missing files
print("List of files not found:")
print(missing_files)

Converting MP3 files to WAV...
Conversion complete. 691 files converted.
WAV files saved in: Converted Recordings
Number of files not found: 1
List of files not found:
['Colaptes_auratus_Miami_Township_near__North_Bend_Hamilton_County_Ohio_713588.mp3']


In [9]:
converted_data['file_name'].value_counts()

file_name
Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.wav                 1
Molothrus_ater_Lawrence_Woods_SNP_417996.wav                                                         1
Agelaius_phoeniceus_Crosby_Township_near__Harrison_Hamilton_County_Ohio_482675.wav                   1
Agelaius_phoeniceus_Case_Farm_Gates_Mills_Ohio_98722.wav                                             1
Agelaius_phoeniceus_Ohio_Crane_Creek_SP_17011.wav                                                    1
                                                                                                    ..
Tachycineta_bicolor_Ohio_Crane_Creek_SP_17046.wav                                                    1
Progne_subis_Vermilion_River_Reservation:_Mill_Hollow_near__Vermilion_Erie_County_Ohio_784046.wav    1
Progne_subis_Ottawa_NWR_Lucas_County_Ohio_803616.wav                                                 1
Progne_subis_Magee_Marsh_Ohio_164751.wav                       

"""
Audio Cleaning Functions

These functions collectively clean an audio file by:
1. Calculating its signal-to-noise ratio (SNR) and filtering out audio that is too noisy.
2. Detecting and trimming long silences from the audio.
3. Checking for spectral spread, which is an indicator of unwanted noise or anomalies.

Main function:
- `clean_audio`: Uses `is_too_noisy`, `has_long_silence`, and `check_spectral_spread` to decide if an audio file is suitable for further processing.
"""

- **Feature Extraction with Librosa**:
    - Extract features like **Mel-spectrograms** and **MFCCs** from each audio file. These features are effective for audio classification tasks.
    - Store these features as images (for CNN input) or numerical arrays (for models like Random Forest or RNNs).

    - **Audio Standardization**:
    - Convert all files to a consistent format (e.g., 16-bit WAV, mono-channel, and a sampling rate like 16 kHz).
- **Clip Standardization**:
    - Trim or pad each audio clip to a standard duration (e.g., 5 seconds), so all inputs have the same shape.

### Start with resampling so every file has the same sample rate

In [10]:
# Define global variable
# Define sample rate
sr = 44100

In [11]:

def resample_audio(file_path, target_sr=sr):
    try:
        # Load the audio file
        audio, sr = librosa.load(file_path, sr=None)
        
        # Resample if necessary
        if sr != target_sr:
            audio = librosa.resample(y=audio, orig_sr=sr, target_sr=target_sr)
            
            # Overwrite the original file
            sf.write(file_path, audio, target_sr)
            
            return True  # Indicate that resampling was performed
        else:
            return False  # Indicate that no resampling was needed
    except Exception as e:
        logging.error(f"Error processing {file_path}: {str(e)}")
        return None

def resample_all_files(directory='Converted Recordings', target_sr=44100):
    total_files = 0
    resampled_files = 0
    error_files = 0
    
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):  # Assuming we're only processing .wav files
            total_files += 1
            file_path = os.path.join(directory, filename)
            result = resample_audio(file_path, target_sr)
            if result is True:
                resampled_files += 1
            elif result is None:
                error_files += 1
    
    print(f"Resampling process complete.")
    print(f"Total files checked: {total_files}")
    print(f"Files resampled: {resampled_files}")
    if error_files > 0:
        print(f"Files with errors: {error_files}")

# Usage
logging.basicConfig(level=logging.ERROR)  # Only log errors
resample_all_files('Converted Recordings')

Resampling process complete.
Total files checked: 690
Files resampled: 311


In [12]:
def calculate_snr(audio):
    """Calculate the signal-to-noise ratio of an audio clip."""
    signal = np.mean(audio**2)
    noise = np.mean((audio - np.mean(audio))**2)
    snr = 10 * np.log10(signal / noise)
    return snr

def is_too_noisy(audio, sr, threshold=-20):
    """Check if audio is too noisy based on its SNR."""
    snr = calculate_snr(audio)
    return snr < threshold

def has_long_silence(audio, sr, silence_threshold=-60, min_silence_duration=1.0):
    """Detects long silences within the audio clip."""
    intervals = librosa.effects.split(audio, top_db=-silence_threshold)
    if len(intervals) > 1:
        silence_durations = np.diff(intervals.ravel())[1::2] / sr
        return np.any(silence_durations >= min_silence_duration)
    return False

def check_spectral_spread(audio, sr, threshold=0.8):
    """Check if the spectral spread exceeds the specified threshold."""
    spec = np.abs(librosa.stft(audio))
    spectral_spread = np.sum(spec > np.mean(spec)) / spec.size
    return spectral_spread > threshold

def clean_audio(audio, sr, file_path, shared_discarded_files):
    """Cleans an audio file by removing noise, silence, and checking for spectral spread."""
    # Get file name for logging
    file_name = os.path.basename(file_path)

    # Check noise level
    if is_too_noisy(audio, sr):
        shared_discarded_files.append({'file_path': file_path, 'reason': 'too_noisy', 'snr': calculate_snr(audio)})
        return None
    
    # Check for long silences
    if has_long_silence(audio, sr):
        audio = librosa.effects.trim(audio, top_db=20)[0]
    
    # Check spectral spread
    if check_spectral_spread(audio, sr):
        shared_discarded_files.append({'file_path': file_path, 'reason': 'bad_spectral_spread'})
        return None
    
    return audio

### Testing the functions on a single file

In [13]:


# Choose a random file
random_file = random.choice(converted_data['file_name'].tolist())
file_path = os.path.join('Converted Recordings', random_file)

# Load the audio file
audio, _ = librosa.load(file_path, sr=sr)

# Test calculate_snr function
snr = calculate_snr(audio)
print(f"calculate_snr completed. SNR: {snr}")

# Test is_too_noisy function
too_noisy = is_too_noisy(audio, sr)
print(f"is_too_noisy completed. Result: {too_noisy}")

# Test has_long_silence function
long_silence = has_long_silence(audio, sr)
print(f"has_long_silence completed. Result: {long_silence}")

# Test check_spectral_spread function
spectral_spread = check_spectral_spread(audio, sr)
print(f"check_spectral_spread completed. Result: {spectral_spread}")

# Test clean_audio function
shared_discarded_files = []
cleaned_audio = clean_audio(audio, sr, file_path, shared_discarded_files)
print(f"clean_audio completed. Cleaned audio returned: {'Yes' if cleaned_audio is not None else 'No'}")
if shared_discarded_files:
    print(f"File discarded. Reason: {shared_discarded_files[0]['reason']}")

print(f"\nTested file: {file_path}")

calculate_snr completed. SNR: 6.730346626682149e-06
is_too_noisy completed. Result: False
has_long_silence completed. Result: False
check_spectral_spread completed. Result: False
clean_audio completed. Cleaned audio returned: Yes

Tested file: Converted Recordings\Empidonax_virescens_Harrison_Township_near__Harrison_Hamilton_County_Ohio_728272.wav


### Test duplication functions two files

In [14]:
def get_audio_fingerprint(audio, sr):
    n_fft = min(2048, len(audio))
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, n_fft=n_fft)
    return np.mean(mfccs, axis=1)

def are_near_duplicates(audio1, sr1, audio2, sr2, threshold=0.99):
    if len(audio1) == 0 or len(audio2) == 0:
        raise ValueError("One or both audio files are empty")
    
    if sr1 != sr2:
        print(f"Warning: Sample rates differ ({sr1} vs {sr2}). Resampling may be necessary.")
    
    fp1 = get_audio_fingerprint(audio1, sr1)
    fp2 = get_audio_fingerprint(audio2, sr2)
    
    if len(fp1) != len(fp2):
        raise ValueError("Fingerprints have different lengths")
    
    similarity = 1 - cosine(fp1, fp2)
    return similarity > threshold

def check_audio_duplicates(file_path, shared_duplicates, shared_discarded_files):
    try:
        # Load the audio
        audio, sr = librosa.load(file_path, sr=None)
        
        # Check for duplicates
        for existing_audio, existing_sr, existing_path in shared_duplicates:
            if are_near_duplicates(audio, sr, existing_audio, existing_sr):
                shared_discarded_files.append((file_path, 'duplicate'))
                return True  # It's a duplicate

        # If not a duplicate, store the audio and path
        shared_duplicates.append((audio, sr, file_path))
        return False  # It's not a duplicate
    except Exception as e:
        logging.error(f"Error checking duplicates for {file_path}: {str(e)}")
        shared_discarded_files.append((file_path, f'error: {str(e)}'))
        return None  # Error occurred

def check_and_remove_duplicates(directory='Converted Recordings'):
    shared_duplicates = []
    shared_discarded_files = []
    duplicate_files = []
    total_files = 0

    # Check each file in the directory for duplicates
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):  # We're only processing .wav files
            total_files += 1
            file_path = os.path.join(directory, filename)
            is_duplicate = check_audio_duplicates(file_path, shared_duplicates, shared_discarded_files)
            if is_duplicate:
                duplicate_files.append(file_path)

    # Log the results
    print(f"Total files checked: {total_files}")
    print(f"Duplicate files found: {len(duplicate_files)}")

    return duplicate_files, shared_discarded_files

In [15]:
### Check duplicate checking functions on two files

# Define sample rate
sr = 44100

# Choose two random files
random_files = random.sample(converted_data['file_name'].tolist(), 2)
file_paths = [os.path.join('Converted Recordings', file) for file in random_files]

# Load the audio files
audio1, _ = librosa.load(file_paths[0], sr=sr)
audio2, _ = librosa.load(file_paths[1], sr=sr)

# Test get_audio_fingerprint function
print("Testing get_audio_fingerprint function:")
fingerprint1 = get_audio_fingerprint(audio1, sr)
print(f"get_audio_fingerprint completed for file 1.")
print(f"Fingerprint shape: {fingerprint1.shape}")
print(f"Fingerprint: {fingerprint1}")

fingerprint2 = get_audio_fingerprint(audio2, sr)
print(f"get_audio_fingerprint completed for file 2.")
print(f"Fingerprint shape: {fingerprint2.shape}")
print(f"Fingerprint: {fingerprint2}")

# Test are_near_duplicates function with different thresholds
print("\nTesting are_near_duplicates function:")
thresholds = [0.95, 0.99, 0.999]
for threshold in thresholds:
    try:
        similarity = 1 - cosine(fingerprint1, fingerprint2)
        are_duplicates = similarity > threshold
        print(f"Threshold: {threshold}")
        print(f"Similarity score: {similarity}")
        print(f"Result: The two files are {'near duplicates' if are_duplicates else 'not near duplicates'}.")
    except ValueError as e:
        print(f"Error occurred: {str(e)}")

print(f"\nTested files:")
print(f"File 1: {file_paths[0]}")
print(f"File 2: {file_paths[1]}")

Testing get_audio_fingerprint function:
get_audio_fingerprint completed for file 1.
Fingerprint shape: (13,)
Fingerprint: [-335.4797     117.9705     -53.542744    58.577644   -19.482904
   21.448452     8.114096    -2.8787546    8.390721     9.897803
   -2.609089     4.336633     9.361945 ]
get_audio_fingerprint completed for file 2.
Fingerprint shape: (13,)
Fingerprint: [-2.8946176e+02  1.0245418e+02 -1.9769826e+00  7.9572072e+00
  1.9233978e+01  2.7745043e+01  3.2369759e+00  1.0591698e+01
  1.0861780e+01  1.8579466e+01  9.7795129e-02  4.0076404e+00
 -5.9672439e-01]

Testing are_near_duplicates function:
Threshold: 0.95
Similarity score: 0.9719074631664746
Result: The two files are near duplicates.
Threshold: 0.99
Similarity score: 0.9719074631664746
Result: The two files are not near duplicates.
Threshold: 0.999
Similarity score: 0.9719074631664746
Result: The two files are not near duplicates.

Tested files:
File 1: Converted Recordings\Sialia_sialis_Cincinnati_Hamilton_County_Ohio

In [16]:
# # Apply duplicate checking to all files
# logging.basicConfig(level=logging.INFO)
# duplicates, discarded_files = check_and_remove_duplicates('Converted Recordings')

# print(f"Duplicate files found: {len(duplicates)}")
# print("List of duplicate files:")
# for file in duplicates:
#     print(file)

# print(f"\nTotal discarded files: {len(discarded_files)}")
# print("List of discarded files and reasons:")
# for file, reason in discarded_files:
#     print(f"{file}: {reason}")

In [17]:
def process_audio(file_path, shared_discarded_files, target_length=5, overlap=0.5, target_sr=44100):
    print(f"Processing file: {file_path}")  # For process_audio
    try:
        # Load and clean the audio
        audio, sr = librosa.load(file_path, sr=None)
        audio = clean_audio(audio, sr, file_path, shared_discarded_files)
        if audio is None:
            return None, None
        
        # Check if audio is shorter than 4410 samples (100ms at 44.1kHz)
        if len(audio) < 4410:
            shared_discarded_files.append((file_path, 'too_short'))
            return None, None
            
        # Convert target_length to samples
        target_samples = int(sr * target_length)
        
        # If audio is shorter than target length, pad with zeros
        if len(audio) < target_samples:
            audio = np.pad(audio, (0, target_samples - len(audio)))
        
        # If audio is longer than target length, segment with overlap
        else:
            segments = []
            for start in range(0, len(audio), int(target_samples * (1 - overlap))):
                segment = audio[start:start + target_samples]
                if len(segment) == target_samples:
                    segments.append(segment)
                elif len(segment) > 0:
                    segment = np.pad(segment, (0, target_samples - len(segment)))
                    segments.append(segment)
            audio = np.array(segments)

            # Final check and padding for all segments
        if len(audio.shape) == 1:  # Single segment
            if len(audio) < target_samples:
                audio = np.pad(audio, (0, target_samples - len(audio)))
            elif len(audio) > target_samples:
                audio = audio[:target_samples]
        else:  # Multiple segments
            padded_segments = []
            for segment in audio:
                if len(segment) < target_samples:
                    segment = np.pad(segment, (0, target_samples - len(segment)))
                elif len(segment) > target_samples:
                    segment = segment[:target_samples]
                padded_segments.append(segment)
            audio = np.array(padded_segments)

        return audio, sr
    except Exception as e:
        logging.error(f"Error processing {file_path}: {str(e)}")
        shared_discarded_files.append((file_path, f'error: {str(e)}'))
        return None, None


In [18]:
def process_file(args):
    row, audio_dir, output_dir, shared_discarded_files = args
    file_path = os.path.join(audio_dir, row['file_name'])
    
    if not os.path.exists(file_path):
        logging.warning(f"File not found: {file_path}")
        return None
    
    processed_audio, sr = process_audio(file_path, shared_discarded_files)
    if processed_audio is None:
        return None
    
    processed_data = []
    if processed_audio.ndim == 2:
        for i, segment in enumerate(processed_audio):
            new_row = row.copy()
            base_filename = f"{os.path.splitext(row['file_name'])[0]}_segment_{i}"
            new_row['processed_file'] = f"{base_filename}.wav"
            wavfile.write(os.path.join(output_dir, new_row['processed_file']), sr, segment)
            processed_data.append(new_row)
    else:
        base_filename = f"{os.path.splitext(row['file_name'])[0]}_processed"
        row['processed_file'] = f"{base_filename}.wav"
        wavfile.write(os.path.join(output_dir, row['processed_file']), sr, processed_audio)
        processed_data.append(row)
    
    return processed_data


In [19]:
def process_dataset(converted_data, audio_dir, output_dir):
    manager = multiprocessing.Manager()
    shared_discarded_files = manager.list()
    
    print(f"Audio directory: {audio_dir}, Output directory: {output_dir}")
    results = []
    
    for _, row in converted_data.iterrows():
        results.append(process_file((row, audio_dir, output_dir, shared_discarded_files)))
    
    processed_data = [item for sublist in results if sublist is not None for item in sublist]

    # Save discarded files to a DataFrame and export as CSV
    discard_log_df = pd.DataFrame(list(shared_discarded_files), columns=['file_path', 'reason'])
    discard_log_df.to_csv('discarded_audio_log.csv', index=False)

    return pd.DataFrame(processed_data)

In [20]:
## Testing audio processing functions on a single file

# Select a random file
random_file = random.choice(converted_data['file_name'])
file_path = os.path.join('Converted Recordings', random_file)

print(f"Testing audio processing on file: {random_file}")

# Create a dummy row for testing
test_row = pd.Series({'file_name': random_file})

# Create necessary directories
output_dir = 'Test'
os.makedirs(output_dir, exist_ok=True)

# Create a dummy shared_discarded_files list
shared_discarded_files = []

# Process the audio file
processed_audio, sr = process_audio(file_path, shared_discarded_files)

if processed_audio is not None:
    print(f"Audio processed successfully.")
    print(f"Processed audio shape: {processed_audio.shape}")
    print(f"Sample rate: {sr}")

    # Simulate the process_file function
    args = (test_row, 'Converted Recordings', output_dir, shared_discarded_files)
    processed_data = process_file(args)

    if processed_data is not None:
        print("\nProcessed data:")
        for item in processed_data:
            print(f"Processed file: {item['processed_file']}")
        print(f"Number of segments: {len(processed_data)}")

        # Verify the output files
        for item in processed_data:
            output_file = os.path.join(output_dir, item['processed_file'])
            if os.path.exists(output_file):
                print(f"Output file created: {output_file}")
                # Load and print some information about the output file
                audio, sr = librosa.load(output_file, sr=None)
                print(f"Output audio duration: {librosa.get_duration(y=audio, sr=sr):.2f} seconds")
            else:
                print(f"Error: Output file not created: {output_file}")
    else:
        print("Error: process_file returned None")
else:
    print("Error: Audio processing failed")

print("\nAudio processing test complete.")

# Print discarded files, if any
if shared_discarded_files:
    print("\nDiscarded files:")
    for file, reason in shared_discarded_files:
        print(f"{file}: {reason}")
else:
    print("\nNo files were discarded.")

Testing audio processing on file: Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750.wav
Processing file: Converted Recordings\Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750.wav
Audio processed successfully.
Processed audio shape: (24, 220500)
Sample rate: 44100
Row: Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750.wav
Processing file: Converted Recordings\Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750.wav

Processed data:
Processed file: Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750_segment_0.wav
Processed file: Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750_segment_1.wav
Processed file: Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750_segment_2.wav
Processed file: Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827750_segment_3.wav
Processed file: Setophaga_caerulescens_Magee_Marsh_-_boardwalk_Lucas_Co

In [21]:
# Main execution
converted_dir = 'Converted Recordings'
processed_dir = 'Processed Recordings'

# Ensure output directory exists
if not os.path.exists(processed_dir):
    os.makedirs(processed_dir)

processed_data = process_dataset(converted_data, converted_dir, processed_dir)
print('Audio Processing Complete')

# Filter processed_data to only include files with "segment" in the file name
processed_data = processed_data[processed_data['processed_file'].str.contains('_segment_')]

# Print completion message and count files in Processed Recordings directory
processed_file_count = len([f for f in os.listdir(processed_dir) if f.endswith('.wav')])
print(f"\nAudio processing is complete. There are now {processed_file_count} files in the '{processed_dir}' directory.")

# Output discarded files
discard_log_df = pd.read_csv('discarded_audio_log.csv')
if not discard_log_df.empty:
    print("\nThe following files were discarded:")
    for _, row in discard_log_df.iterrows():
        print(f"{row['file_path']}: {row['reason']}")
else:
    print("\nNo files were discarded.")

Audio directory: Converted Recordings, Output directory: Processed Recordings
Row: Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.wav
Processing file: Converted Recordings\Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.wav
Row: Branta_canadensis_Lawrence_Woods_SNP_418000.wav
Processing file: Converted Recordings\Branta_canadensis_Lawrence_Woods_SNP_418000.wav
Row: Branta_canadensis_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_691528.wav
Processing file: Converted Recordings\Branta_canadensis_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_691528.wav
Row: Cygnus_buccinator_Killdeer_Plains_Wildlife_Management_Area_Wyandot_County_Ohio_713788.wav
Processing file: Converted Recordings\Cygnus_buccinator_Killdeer_Plains_Wildlife_Management_Area_Wyandot_County_Ohio_713788.wav
Row: Aix_sponsa_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_815809.w

In [22]:
print(processed_data.info())
processed_data.head()

<class 'pandas.core.frame.DataFrame'>
Index: 12120 entries, 0 to 690
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   genus            12120 non-null  object 
 1   species          12120 non-null  object 
 2   latitude         12120 non-null  float64
 3   longitude        12120 non-null  float64
 4   quality          12120 non-null  object 
 5   file_name        12120 non-null  object 
 6   simplified_type  12120 non-null  object 
 7   season           12120 non-null  object 
 8   time_of_day      12120 non-null  object 
 9   length_seconds   12120 non-null  int64  
 10  processed_file   12120 non-null  object 
dtypes: float64(2), int64(1), object(8)
memory usage: 1.1+ MB
None


Unnamed: 0,genus,species,latitude,longitude,quality,file_name,simplified_type,season,time_of_day,length_seconds,processed_file
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...


### Data Augmentation
### Augment the processed audio files.
- Pitch Shift
- Time Stretch
- add_noise
- change_speed
- apply_filter
- add_background
- time_shift
- augment_audio
- 

In [23]:

def generate_wind_sound(duration, sr):
    t = np.linspace(0, duration, int(sr * duration), False)
    wind = np.random.normal(0, 0.1, int(sr * duration))
    wind_filtered = np.convolve(wind, np.ones(1000)/1000, mode='same')
    return wind_filtered / np.max(np.abs(wind_filtered))

def generate_leaf_rustle(duration, sr):
    t = np.linspace(0, duration, int(sr * duration), False)
    leaf = np.random.normal(0, 0.1, int(sr * duration))
    envelope = np.exp(-t * 10) * np.sin(2 * np.pi * 2 * t)**2
    return leaf * envelope / np.max(np.abs(leaf * envelope))

def generate_water_sound(duration, sr):
    t = np.linspace(0, duration, int(sr * duration), False)
    water = np.random.normal(0, 0.1, int(sr * duration))
    water_filtered = np.convolve(water, np.ones(500)/500, mode='same')
    ripple = np.sin(2 * np.pi * 2 * t) * np.exp(-t * 2)
    return (water_filtered + ripple) / np.max(np.abs(water_filtered + ripple))

def mix_nature_sounds(duration, sr):
    wind = generate_wind_sound(duration, sr)
    leaf = generate_leaf_rustle(duration, sr)
    water = generate_water_sound(duration, sr)
    
    mix = wind * 0.7 + leaf * 0.2 + water * 0.1
    return mix / np.max(np.abs(mix))

# Generate a 5-second mix of nature-like sounds
sr = 44100
duration = 5
nature_background = mix_nature_sounds(duration, sr)

In [24]:

def pitch_shift(audio, sr, n_steps):
    n_fft = min(2048, len(audio))
    return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps, n_fft=n_fft)

def time_stretch(audio, rate):
    return librosa.effects.time_stretch(audio, rate=rate)

def add_noise(audio, noise_factor):
    noise = np.random.randn(len(audio))
    augmented_audio = audio + noise_factor * noise
    return np.clip(augmented_audio, -1, 1)

def change_speed(audio, speed_factor):
    return librosa.effects.time_stretch(audio, rate=1/speed_factor)

def apply_filter(audio, sr, filter_type='lowpass', cutoff=1000):
    nyquist = 0.5 * sr
    normal_cutoff = cutoff / nyquist
    b, a = butter(4, normal_cutoff, btype=filter_type, analog=False)
    return lfilter(b, a, audio)

def add_background(audio, background, ratio=0.1):
    if len(background) != len(audio):
        if len(background) > len(audio):
            start = np.random.randint(0, len(background) - len(audio))
            background = background[start:start+len(audio)]
        else:
            background = np.pad(background, (0, len(audio) - len(background)))
    
    if len(audio) != len(background):
        print(f"Shape mismatch in add_background: audio {len(audio)}, background {len(background)}")
        return audio  # Return original audio if shapes don't match
    
    return audio + ratio * background

def time_shift(audio, shift_max, roll_prob=0.5):
    shift = np.random.randint(-shift_max, shift_max)
    if random.random() < roll_prob:
        return np.roll(audio, shift)
    else:
        if shift > 0:
            return np.pad(audio, (shift, 0))[:len(audio)]
        else:
            return np.pad(audio, (0, -shift))[:-shift]

def augment_audio(audio, sr):
    augmentations = [
        (pitch_shift, {'n_steps': random.uniform(-2, 2)}),
        (time_stretch, {'rate': random.uniform(0.8, 1.2)}),
        (add_noise, {'noise_factor': random.uniform(0.001, 0.015)}),
        (change_speed, {'speed_factor': random.uniform(0.9, 1.1)}),
        (apply_filter, {'filter_type': random.choice(['lowpass', 'highpass']),
                        'cutoff': random.uniform(1000, 4000)}),
        (time_shift, {'shift_max': int(sr * 0.5)})
    ]
    
    num_augments = random.randint(2, 4)
    selected_augments = random.sample(augmentations, num_augments)
    
    applied_augmentations = []
    
    for augment_func, params in selected_augments:
        if augment_func.__name__ in ['pitch_shift', 'apply_filter']:
            audio = augment_func(audio, sr, **params)
        else:
            audio = augment_func(audio, **params)
        applied_augmentations.append(f"{augment_func.__name__}:{','.join(f'{k}={v}' for k, v in params.items())}")
    
    if random.random() < 0.5:
        nature_background = mix_nature_sounds(len(audio) / sr, sr)
        ratio = random.uniform(0.1, 0.3)
        audio = add_background(audio, nature_background, ratio=ratio)
        applied_augmentations.append(f"add_background:ratio={ratio:.2f}")
    
    return audio, applied_augmentations



In [25]:

def augment_and_save(input_file, output_dir, num_augmentations=3):
    try:
        # Load the audio file
        audio, sr = librosa.load(input_file, sr=None)
        
        augmented_files = []
        all_applied_augmentations = []
        
        for i in range(num_augmentations):
            # Apply augmentation
            augmented_audio, applied_augmentations = augment_audio(audio, sr)
            
            # Generate new filename
            base_name = os.path.basename(input_file)
            name, ext = os.path.splitext(base_name)
            new_name = f"{name}_aug_{i+1}{ext}"
            output_path = os.path.join(output_dir, new_name)
            
            # Save augmented audio
            sf.write(output_path, augmented_audio, sr)
            
            augmented_files.append(output_path)
            all_applied_augmentations.append(';'.join(applied_augmentations))
        
        return augmented_files, all_applied_augmentations
    
    except Exception as e:
        print(f"Error processing {input_file}: {str(e)}")
        return [], []


def process_dataframe(df, input_dir, output_dir, num_augmentations=3):
    new_rows = []
    
    for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing files"):
        input_file = os.path.join(input_dir, row['processed_file'])
        augmented_files, augmentations = augment_and_save(input_file, output_dir, num_augmentations)
        
        for aug_file, aug_details in zip(augmented_files, augmentations):
            new_row = row.copy()
            new_row['processed_file'] = os.path.relpath(aug_file, output_dir)
            new_row['augmentations'] = aug_details
            new_rows.append(new_row)
    
    augmented_df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)
    return augmented_df


In [26]:
# Test Augmentation functions
# Filter processed_data to only include files with "segment" in the file name
processed_data = processed_data[processed_data['processed_file'].str.contains('_segment_')]

# Convert 'file_name' column to a list
file_names = processed_data['processed_file'].tolist()

# Select a random file
random_file = random.choice(file_names)
file_path = os.path.join('Processed Recordings', random_file)

# Load the audio file
audio, sr = librosa.load(file_path, sr=None)

# Create 'Test' directory if it doesn't exist
os.makedirs('Test', exist_ok=True)

# List of augmentation functions to test
augmentation_functions = [
    (pitch_shift, {'n_steps': 2}),
    (time_stretch, {'rate': 1.2}),
    (add_noise, {'noise_factor': 0.01}),
    (change_speed, {'speed_factor': 1.1}),
    (apply_filter, {'filter_type': 'lowpass', 'cutoff': 2000}),
    (time_shift, {'shift_max': int(sr * 0.5)}),
    (mix_nature_sounds, {'duration': len(audio) / sr, 'sr': sr})
]

print(f"Testing augmentations on file: {random_file}")

# Apply each augmentation function and save the result
for i, (func, params) in enumerate(augmentation_functions):
    if func.__name__ == 'mix_nature_sounds':
        # For mix_nature_sounds, handle it differently as it needs a background
        background = func(**params)
        augmented = add_background(audio, background, ratio=0.2)
    elif func.__name__ in ['time_stretch', 'change_speed', 'add_noise', 'time_shift']:
        # These functions don't need sr
        augmented = func(audio, **params)
    else:
        # For pitch_shift and apply_filter, which need sr, pass audio, sr, and params
        augmented = func(audio, sr, **params)
    
    # Generate output filename
    output_filename = f"{os.path.splitext(random_file)[0]}_{func.__name__}.wav"
    output_path = os.path.join('Test', output_filename)
    
    # Save the augmented audio
    sf.write(output_path, augmented, sr)
    
    print(f"Saved {output_filename}")

print("Augmentation test complete.")

Testing augmentations on file: Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48.wav
Saved Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48_pitch_shift.wav
Saved Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48_time_stretch.wav
Saved Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48_add_noise.wav
Saved Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48_change_speed.wav
Saved Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48_apply_filter.wav
Saved Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48_time_shift.wav
Saved Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_48_mix_nature_sounds.wav
Augmentation test complete.


In [27]:

# Apply augmentation to every file
input_dir = 'Processed Recordings'
output_dir = 'Augmented Recordings'
num_augmentations = 3


# Process the dataframe
augmented_data = process_dataframe(processed_data, input_dir, output_dir, num_augmentations)

print(f"Augmentation complete. {len(augmented_data) - len(processed_data)} new samples created.")

Processing files:   0%|          | 7/12120 [00:02<54:49,  3.68it/s]  

Error processing Processed Recordings\Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750_segment_5.wav: operands could not be broadcast together with shapes (500,) (406,) 


Processing files:   1%|▏         | 178/12120 [01:04<53:10,  3.74it/s]  

Error processing Processed Recordings\Antrostomus_carolinensis_Hamilton_Rd_Adams_Cty_Ohio_939171_segment_9.wav: operands could not be broadcast together with shapes (1000,) (865,) 


Processing files:   2%|▏         | 212/12120 [01:15<1:16:18,  2.60it/s]

Error processing Processed Recordings\Antrostomus_carolinensis_Brush_Creek_near__West_Union_Adams_County_Ohio_178533_segment_3.wav: operands could not be broadcast together with shapes (1000,) (644,) 


Processing files:   3%|▎         | 314/12120 [01:54<1:01:25,  3.20it/s]

Error processing Processed Recordings\Antigone_canadensis_Troy_Township_near__Delaware_Delaware_County_Ohio_625913_segment_8.wav: operands could not be broadcast together with shapes (1000,) (614,) 


Processing files:   5%|▌         | 608/12120 [03:39<54:34,  3.52it/s]  

Error processing Processed Recordings\Scolopax_minor_Columbus_Franklin_County_Ohio_171997_segment_35.wav: operands could not be broadcast together with shapes (1000,) (778,) 


Processing files:   5%|▌         | 666/12120 [04:00<53:15,  3.58it/s]  

Error processing Processed Recordings\Scolopax_minor_Columbus_Franklin_County_Ohio_171991_segment_27.wav: operands could not be broadcast together with shapes (1000,) (727,) 




Error processing Processed Recordings\Scolopax_minor_Columbus_Franklin_County_Ohio_171991_segment_38.wav: operands could not be broadcast together with shapes (1000,) (978,) 


Processing files:   6%|▋         | 772/12120 [04:37<1:01:50,  3.06it/s]

Error processing Processed Recordings\Botaurus_lentiginosus_Battelle_Darby_Metro_Park_-_Wetlands_417864_segment_0.wav: operands could not be broadcast together with shapes (500,) (56,) 


Processing files:   7%|▋         | 885/12120 [05:17<1:11:16,  2.63it/s]

Error processing Processed Recordings\Bubo_virginianus_Green_Township_near__Mount_Orab_Brown_County_Ohio_760069_segment_9.wav: operands could not be broadcast together with shapes (500,) (474,) 


Processing files:   8%|▊         | 945/12120 [05:37<1:02:36,  2.97it/s]

Error processing Processed Recordings\Bubo_virginianus_mount_orab_ohio_760068_segment_42.wav: operands could not be broadcast together with shapes (1000,) (690,) 


Processing files:  10%|▉         | 1155/12120 [06:51<55:40,  3.28it/s]  

Error processing Processed Recordings\Megascops_asio_mount_orab_ohio_669305_segment_7.wav: operands could not be broadcast together with shapes (500,) (386,) 


Processing files:  11%|█▏        | 1381/12120 [08:13<42:36,  4.20it/s]  

Error processing Processed Recordings\Melanerpes_carolinus_Green_Township_near__Cincinnati_Hamilton_County_Ohio_726629_segment_3.wav: operands could not be broadcast together with shapes (500,) (221,) 


Processing files:  12%|█▏        | 1435/12120 [08:31<53:04,  3.36it/s]  

Error processing Processed Recordings\Melanerpes_carolinus_Green_Township_near__Cincinnati_Hamilton_County_Ohio_672994_segment_8.wav: operands could not be broadcast together with shapes (1000,) (705,) 


Processing files:  12%|█▏        | 1507/12120 [08:57<58:53,  3.00it/s]  

Error processing Processed Recordings\Dryobates_pubescens_Pelee_near__Leamington_Essex_County_Ontario_476564_segment_0.wav: operands could not be broadcast together with shapes (1000,) (656,) 


Processing files:  13%|█▎        | 1586/12120 [09:25<1:00:38,  2.90it/s]

Error processing Processed Recordings\Colaptes_auratus_Anderson_Township_near__Cincinnati_Hamilton_County_Ohio_542520_segment_21.wav: operands could not be broadcast together with shapes (1000,) (955,) 


Processing files:  13%|█▎        | 1626/12120 [09:39<1:02:32,  2.80it/s]

Error processing Processed Recordings\Dryocopus_pileatus_Harrison_Township_near__Harrison_Hamilton_County_Ohio_691470_segment_38.wav: operands could not be broadcast together with shapes (1000,) (865,) 


Processing files:  16%|█▌        | 1890/12120 [11:16<50:55,  3.35it/s]

Error processing Processed Recordings\Empidonax_virescens_Harrison_Township_near__Harrison_Hamilton_County_Ohio_728279_segment_7.wav: operands could not be broadcast together with shapes (1000,) (648,) 


Processing files:  16%|█▌        | 1933/12120 [11:31<54:44,  3.10it/s]  

Error processing Processed Recordings\Empidonax_virescens_Harrison_Township_near__Harrison_Hamilton_County_Ohio_728272_segment_1.wav: operands could not be broadcast together with shapes (500,) (428,) 


Processing files:  16%|█▌        | 1964/12120 [11:42<1:13:43,  2.30it/s]

Error processing Processed Recordings\Empidonax_virescens_Miami_Township_near__North_Bend_Hamilton_County_Ohio_670407_segment_25.wav: operands could not be broadcast together with shapes (1000,) (913,) 


Processing files:  17%|█▋        | 2105/12120 [12:33<1:02:49,  2.66it/s]

Error processing Processed Recordings\Tyrannus_tyrannus_Point_Pelee_National_Park_near__Wheatley_Essex_County_Ontario_805523_segment_12.wav: operands could not be broadcast together with shapes (500,) (300,) 


Processing files:  19%|█▉        | 2288/12120 [13:38<57:47,  2.84it/s]  

Error processing Processed Recordings\Vireo_gilvus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827590_segment_11.wav: operands could not be broadcast together with shapes (1000,) (823,) 


Processing files:  19%|█▉        | 2323/12120 [13:50<58:10,  2.81it/s]  

Error processing Processed Recordings\Vireo_gilvus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827590_segment_46.wav: operands could not be broadcast together with shapes (1000,) (913,) 


Processing files:  19%|█▉        | 2349/12120 [13:59<51:47,  3.14it/s]

Error processing Processed Recordings\Vireo_gilvus_Point_Pelee_National_Park_near__Wheatley_Essex_County_Ontario_806076_segment_1.wav: operands could not be broadcast together with shapes (500,) (14,) 


Processing files:  20%|█▉        | 2381/12120 [14:10<56:03,  2.90it/s]  

Error processing Processed Recordings\Vireo_gilvus_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726753_segment_12.wav: operands could not be broadcast together with shapes (500,) (97,) 


Processing files:  20%|██        | 2480/12120 [14:47<48:39,  3.30it/s]  

Error processing Processed Recordings\Vireo_gilvus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827589_segment_3.wav: operands could not be broadcast together with shapes (500,) (360,) 




Error processing Processed Recordings\Vireo_gilvus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827589_segment_5.wav: operands could not be broadcast together with shapes (1000,) (884,) 


Processing files:  22%|██▏       | 2681/12120 [15:59<56:03,  2.81it/s]  

Error processing Processed Recordings\Vireo_flavifrons_Shawnee_State_Forest_Scioto_County_Ohio_417452_segment_2.wav: operands could not be broadcast together with shapes (500,) (314,) 


Processing files:  22%|██▏       | 2700/12120 [16:06<1:00:09,  2.61it/s]

Error processing Processed Recordings\Vireo_flavifrons_Miami_Township_near__North_Bend_Hamilton_County_Ohio_645966_segment_4.wav: operands could not be broadcast together with shapes (500,) (258,) 




Error processing Processed Recordings\Vireo_bellii_Heritage_Rail-Trail_south_of_Hayden_Run_Rd_near_mowed_path_BEVI_#1_103569_segment_5.wav: operands could not be broadcast together with shapes (500,) (432,) 


Processing files:  26%|██▋       | 3199/12120 [19:03<38:28,  3.86it/s]  

Error processing Processed Recordings\Baeolophus_bicolor_Green_Township_near__Cincinnati_Hamilton_County_Ohio_673001_segment_5.wav: operands could not be broadcast together with shapes (1000,) (985,) 


Processing files:  26%|██▋       | 3209/12120 [19:07<46:08,  3.22it/s]

Error processing Processed Recordings\Baeolophus_bicolor_Miami_Township_near__Cleves_Hamilton_County_Ohio_645567_segment_6.wav: operands could not be broadcast together with shapes (500,) (22,) 


Processing files:  27%|██▋       | 3220/12120 [19:11<57:06,  2.60it/s]

Error processing Processed Recordings\Baeolophus_bicolor_Miami_Township_near__Cleves_Hamilton_County_Ohio_645567_segment_18.wav: operands could not be broadcast together with shapes (500,) (4,) 


Processing files:  27%|██▋       | 3246/12120 [19:20<46:04,  3.21it/s]  

Error processing Processed Recordings\Baeolophus_bicolor_Beachwood_Cuyahoga_County_Ohio_287347_segment_9.wav: operands could not be broadcast together with shapes (1000,) (588,) 




Error processing Processed Recordings\Poecile_carolinensis_Green_Township_near__Cincinnati_Hamilton_County_Ohio_779380_segment_13.wav: operands could not be broadcast together with shapes (500,) (316,) 


Processing files:  29%|██▉       | 3497/12120 [20:51<52:04,  2.76it/s]  

Error processing Processed Recordings\Poecile_atricapillus_Point_Pelee_National_Park_near__Wheatley_Essex_County_Ontario_803377_segment_4.wav: operands could not be broadcast together with shapes (500,) (320,) 




Error processing Processed Recordings\Eremophila_alpestris_Michigan_Monroe_County_16969_segment_3.wav: operands could not be broadcast together with shapes (500,) (12,) 


Processing files:  30%|██▉       | 3584/12120 [21:23<1:00:04,  2.37it/s]

Error processing Processed Recordings\Tachycineta_bicolor_Point_Pelee_National_Park_near__Wheatley_Essex_County_Ontario_839333_segment_4.wav: operands could not be broadcast together with shapes (500,) (91,) 


Processing files:  30%|███       | 3652/12120 [21:46<42:39,  3.31it/s]

Error processing Processed Recordings\Tachycineta_bicolor_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827054_segment_64.wav: operands could not be broadcast together with shapes (1000,) (641,) 


Processing files:  32%|███▏      | 3829/12120 [22:49<40:25,  3.42it/s]  

Error processing Processed Recordings\Progne_subis_Magee_Marsh_Ohio_164748_segment_19.wav: operands could not be broadcast together with shapes (500,) (449,) 


Processing files:  32%|███▏      | 3889/12120 [23:11<39:13,  3.50it/s]  

Error processing Processed Recordings\Cistothorus_stellaris_Battelle_Darby_Metro_Park--Cutoff_trail_from_Darby_Creek_Trail_past_barn_to_Darby_Creek_Rd._105741_segment_17.wav: operands could not be broadcast together with shapes (1000,) (545,) 


Processing files:  33%|███▎      | 4001/12120 [23:54<53:08,  2.55it/s]  

Error processing Processed Recordings\Thryothorus_ludovicianus_Whitewater_Township_near__Cleves_Hamilton_County_Ohio_896572_segment_5.wav: operands could not be broadcast together with shapes (500,) (496,) 


Processing files:  33%|███▎      | 4048/12120 [24:13<43:05,  3.12it/s]  

Error processing Processed Recordings\Thryothorus_ludovicianus_Spring_Valley_near__Waynesville_Greene_Cty_Ohio_477595_segment_6.wav: operands could not be broadcast together with shapes (500,) (378,) 


Processing files:  35%|███▌      | 4291/12120 [25:45<46:00,  2.84it/s]  

Error processing Processed Recordings\Troglodytes_aedon_Cleveland_Heights_Ohio_67466_segment_16.wav: operands could not be broadcast together with shapes (1000,) (794,) 


Processing files:  36%|███▋      | 4403/12120 [26:28<47:13,  2.72it/s]  

Error processing Processed Recordings\Troglodytes_aedon_Ohio_Crane_Creek_SP_17053_segment_5.wav: operands could not be broadcast together with shapes (500,) (312,) 




Error processing Processed Recordings\Troglodytes_aedon_Ohio_Crane_Creek_SP_17053_segment_9.wav: operands could not be broadcast together with shapes (1000,) (689,) 


Processing files:  38%|███▊      | 4597/12120 [27:37<41:52,  2.99it/s]  

Error processing Processed Recordings\Polioptila_caerulea_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_816236_segment_4.wav: operands could not be broadcast together with shapes (500,) (208,) 


Processing files:  39%|███▊      | 4696/12120 [28:13<1:20:48,  1.53it/s]

Error processing Processed Recordings\Polioptila_caerulea_Anderson_Township_near__Cincinnati_Hamilton_County_Ohio_552557_segment_12.wav: operands could not be broadcast together with shapes (1000,) (972,) 




Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814891_segment_48.wav: operands could not be broadcast together with shapes (1000,) (865,) 


Processing files:  41%|████▏     | 5020/12120 [30:08<37:42,  3.14it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814888_segment_11.wav: operands could not be broadcast together with shapes (1000,) (694,) 


Processing files:  41%|████▏     | 5026/12120 [30:11<48:57,  2.42it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814888_segment_18.wav: operands could not be broadcast together with shapes (1000,) (554,) 


Processing files:  42%|████▏     | 5068/12120 [30:25<35:10,  3.34it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814888_segment_59.wav: operands could not be broadcast together with shapes (1000,) (601,) 




Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814889_segment_2.wav: operands could not be broadcast together with shapes (500,) (348,) 


Processing files:  43%|████▎     | 5227/12120 [31:22<37:02,  3.10it/s]  

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_boardwalk_west_end_262807_segment_39.wav: operands could not be broadcast together with shapes (1000,) (897,) 


Processing files:  43%|████▎     | 5253/12120 [31:31<31:04,  3.68it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_boardwalk_west_end_262806_segment_17.wav: operands could not be broadcast together with shapes (1000,) (806,) 


Processing files:  48%|████▊     | 5773/12120 [34:35<33:18,  3.18it/s]  

Error processing Processed Recordings\Toxostoma_rufum_Virginia_Kendall_Lake_Peninsula_Summit_County_Ohio_412205_segment_17.wav: operands could not be broadcast together with shapes (1000,) (505,) 


Processing files:  48%|████▊     | 5797/12120 [34:44<37:27,  2.81it/s]

Error processing Processed Recordings\Hylocichla_mustelina_Whitewater_Township_near__Cleves_Hamilton_County_Ohio_817713_segment_5.wav: operands could not be broadcast together with shapes (500,) (164,) 


Processing files:  48%|████▊     | 5843/12120 [35:00<40:19,  2.59it/s]

Error processing Processed Recordings\Hylocichla_mustelina_Packer-Hammersmith_Center_near__Oregon_Lucas_County_Ohio_784058_segment_34.wav: operands could not be broadcast together with shapes (500,) (131,) 


Processing files:  50%|████▉     | 6003/12120 [36:00<18:24,  5.54it/s]

Error processing Processed Recordings\Hylocichla_mustelina_Michigan_Monroe_County_16970_segment_6.wav: operands could not be broadcast together with shapes (500,) (178,) 




Error processing Processed Recordings\Catharus_fuscescens_Oak_Openings_Lucas_County_Ohio_803619_segment_12.wav: operands could not be broadcast together with shapes (1000,) (998,) 


Processing files:  53%|█████▎    | 6431/12120 [38:32<33:54,  2.80it/s]

Error processing Processed Recordings\Spizella_passerina_Michigan_Monroe_County_16984_segment_5.wav: operands could not be broadcast together with shapes (500,) (367,) 




Error processing Processed Recordings\Spizella_passerina_Michigan_Monroe_County_16981_segment_3.wav: operands could not be broadcast together with shapes (1000,) (877,) 


Processing files:  55%|█████▍    | 6664/12120 [39:56<34:37,  2.63it/s]

Error processing Processed Recordings\Junco_hyemalis_Pebblebrook_Lane_Moreland_Hills_Ohio_79986_segment_23.wav: operands could not be broadcast together with shapes (1000,) (620,) 




Error processing Processed Recordings\Melospiza_melodia_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_826141_segment_7.wav: operands could not be broadcast together with shapes (1000,) (517,) 


Processing files:  56%|█████▌    | 6811/12120 [40:47<34:17,  2.58it/s]

Error processing Processed Recordings\Melospiza_melodia_Green_Township_near__Cincinnati_Hamilton_County_Ohio_807468_segment_10.wav: operands could not be broadcast together with shapes (1000,) (582,) 


Processing files:  57%|█████▋    | 6856/12120 [41:02<25:20,  3.46it/s]

Error processing Processed Recordings\Melospiza_melodia_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_687157_segment_2.wav: operands could not be broadcast together with shapes (1000,) (548,) 


Processing files:  57%|█████▋    | 6926/12120 [41:29<28:05,  3.08it/s]

Error processing Processed Recordings\Pipilo_erythrophthalmus_Point_Pelee_National_Park_near__Wheatley_Essex_County_Ontario_805531_segment_8.wav: operands could not be broadcast together with shapes (1000,) (623,) 


Processing files:  58%|█████▊    | 6993/12120 [41:53<26:22,  3.24it/s]

Error processing Processed Recordings\Pipilo_erythrophthalmus_Ross_Township_near__Hamilton_Butler_County_Ohio_533333_segment_9.wav: operands could not be broadcast together with shapes (1000,) (652,) 




Error processing Processed Recordings\Icterus_galbula_Kiser_Lake_SP_Champaign_County_Ohio_938239_segment_0.wav: operands could not be broadcast together with shapes (1000,) (843,) 


Processing files:  61%|██████    | 7350/12120 [47:02<1:30:47,  1.14s/it]

Error processing Processed Recordings\Icterus_galbula_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_820109_segment_66.wav: operands could not be broadcast together with shapes (1000,) (847,) 


Processing files:  61%|██████▏   | 7431/12120 [48:54<1:41:56,  1.30s/it]

Error processing Processed Recordings\Icterus_galbula_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_818926_segment_68.wav: operands could not be broadcast together with shapes (500,) (295,) 


Processing files:  62%|██████▏   | 7488/12120 [50:24<37:01,  2.09it/s]  

Error processing Processed Recordings\Icterus_galbula_Park_End_Dr_Montgomery_County_Ohio_477390_segment_2.wav: operands could not be broadcast together with shapes (500,) (499,) 


Processing files:  62%|██████▏   | 7566/12120 [50:56<20:28,  3.71it/s]  

Error processing Processed Recordings\Icterus_galbula_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_820106_segment_23.wav: operands could not be broadcast together with shapes (500,) (270,) 


Processing files:  63%|██████▎   | 7650/12120 [51:26<23:19,  3.19it/s]

Error processing Processed Recordings\Icterus_spurius_Ohio_Ottawa_NWR_178296_segment_32.wav: operands could not be broadcast together with shapes (1000,) (574,) 


Processing files:  63%|██████▎   | 7667/12120 [51:32<31:56,  2.32it/s]

Error processing Processed Recordings\Icterus_spurius_Heritage_Trail_Metro_Park_Hayden_Run_Rd_parking_area_103503_segment_15.wav: operands could not be broadcast together with shapes (500,) (48,) 


Processing files:  63%|██████▎   | 7692/12120 [51:44<38:12,  1.93it/s]

Error processing Processed Recordings\Icterus_spurius_Heritage_Trail_Metro_Park_Hayden_Run_Rd_parking_area_103502_segment_9.wav: operands could not be broadcast together with shapes (500,) (105,) 


Processing files:  63%|██████▎   | 7695/12120 [51:45<29:45,  2.48it/s]


KeyboardInterrupt: 

In [None]:
# Fill the newly created column's missing values
augmented_data = augmented_data.fillna("None")
# Review the updated dataframe
augmented_data.info()

In [None]:
# Save the to csv
augmented_data.to_csv("augmented_data.csv", index=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48207 entries, 0 to 48206
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   genus            48207 non-null  object 
 1   species          48207 non-null  object 
 2   latitude         48207 non-null  float64
 3   longitude        48207 non-null  float64
 4   quality          48207 non-null  object 
 5   file_name        48207 non-null  object 
 6   simplified_type  48207 non-null  object 
 7   season           48207 non-null  object 
 8   time_of_day      48207 non-null  object 
 9   length_seconds   48207 non-null  int64  
 10  processed_file   48207 non-null  object 
 11  augmentations    36087 non-null  object 
dtypes: float64(2), int64(1), object(9)
memory usage: 4.4+ MB


Testing feature extraction on file: Processed Recordings\Vermivora_cyanoptera_Irwin_Prairie_SNP_Ohio_418085_segment_2.wav
mel_spectrogram_db shape: (128, 431)
mfccs shape: (13, 431), mean: -23.3198, std: 85.8184
spectral_centroids shape: (431,), mean: 3577.4505, std: 260.1879
chroma shape: (12, 431), mean: 0.5359, std: 0.2494
zero_crossing_rate shape: (431,), mean: 0.0754, std: 0.0118
spectral_rolloff shape: (431,), mean: 7900.2874, std: 1548.7024

Summarized Features:
mfccs_summary type: numpy.ndarray, shape: (39,), mean: -6.2637, std: 70.0940
spectral_centroids_summary type: numpy.ndarray, shape: (3,), mean: 2709.8262, std: 1756.5371
chroma_summary type: numpy.ndarray, shape: (36,), mean: 0.5917, std: 0.3158
zero_crossing_rate_summary type: numpy.ndarray, shape: (3,), mean: 0.0748, std: 0.0512
spectral_rolloff_summary type: numpy.ndarray, shape: (3,), mean: 6774.4191, std: 3889.4924

Final feature vector shape: (84,)
Mel-spectrogram saved as: Vermivora_cyanoptera_Irwin_Prairie_SNP_Oh

Current directory (base_dir): c:\Users\16148\Desktop\Projects\bird_call_project
Output directory: mel-spectrograms


  0%|          | 26/48207 [00:07<3:45:58,  3.55it/s]



  0%|          | 111/48207 [00:30<3:37:33,  3.68it/s]



  0%|          | 120/48207 [00:33<3:37:21,  3.69it/s]



  0%|          | 147/48207 [00:40<3:36:32,  3.70it/s]



  0%|          | 167/48207 [00:45<3:38:15,  3.67it/s]



  0%|          | 188/48207 [00:51<3:38:55,  3.66it/s]



  1%|          | 246/48207 [01:07<3:39:45,  3.64it/s]



  1%|          | 279/48207 [01:17<3:46:09,  3.53it/s]



  1%|          | 282/48207 [01:17<3:04:42,  4.32it/s]



  1%|          | 286/48207 [01:18<3:01:53,  4.39it/s]



  1%|          | 295/48207 [01:20<3:35:46,  3.70it/s]



  1%|          | 301/48207 [01:22<3:23:19,  3.93it/s]



  1%|          | 342/48207 [01:32<3:34:02,  3.73it/s]



  1%|          | 346/48207 [01:33<3:11:38,  4.16it/s]



  1%|          | 350/48207 [01:34<3:02:56,  4.36it/s]



  1%|          | 501/48207 [02:17<3:56:10,  3.37it/s]



  1%|▏         | 710/48207 [03:18<3:38:34,  3.62it/s]



  1%|▏         | 714/48207 [03:19<3:13:34,  4.09it/s]



  1%|▏         | 719/48207 [03:20<3:20:33,  3.95it/s]



  2%|▏         | 780/48207 [03:38<3:37:48,  3.63it/s]



  2%|▏         | 791/48207 [03:41<3:37:34,  3.63it/s]



  2%|▏         | 813/48207 [03:47<3:39:49,  3.59it/s]



  2%|▏         | 823/48207 [03:50<7:46:52,  1.69it/s]



  2%|▏         | 861/48207 [04:01<3:40:04,  3.59it/s]



  2%|▏         | 1031/48207 [04:50<3:40:52,  3.56it/s]



  2%|▏         | 1146/48207 [05:29<4:46:07,  2.74it/s] 



  2%|▏         | 1185/48207 [05:40<3:39:07,  3.58it/s]



  3%|▎         | 1215/48207 [05:48<3:40:41,  3.55it/s]



  3%|▎         | 1220/48207 [05:49<3:22:40,  3.86it/s]



  3%|▎         | 1262/48207 [06:01<3:41:49,  3.53it/s]



  3%|▎         | 1292/48207 [06:09<3:44:07,  3.49it/s]



  3%|▎         | 1298/48207 [06:11<3:04:16,  4.24it/s]



  3%|▎         | 1312/48207 [06:14<4:09:00,  3.14it/s]



  3%|▎         | 1316/48207 [06:15<3:33:37,  3.66it/s]



  3%|▎         | 1325/48207 [06:18<3:39:47,  3.55it/s]



  3%|▎         | 1330/48207 [06:19<3:20:16,  3.90it/s]



  3%|▎         | 1457/48207 [06:58<3:40:56,  3.53it/s] 



  3%|▎         | 1460/48207 [06:58<3:04:13,  4.23it/s]



  3%|▎         | 1472/48207 [07:02<3:39:49,  3.54it/s]



  3%|▎         | 1506/48207 [07:11<3:44:04,  3.47it/s]



  3%|▎         | 1509/48207 [07:12<3:04:44,  4.21it/s]



  3%|▎         | 1517/48207 [07:14<3:32:26,  3.66it/s]



  3%|▎         | 1526/48207 [07:16<3:36:10,  3.60it/s]



  3%|▎         | 1540/48207 [07:20<3:42:57,  3.49it/s]



  3%|▎         | 1563/48207 [07:26<3:46:01,  3.44it/s]



  3%|▎         | 1586/48207 [07:32<3:37:57,  3.56it/s]



  3%|▎         | 1663/48207 [07:55<3:41:28,  3.50it/s]



  3%|▎         | 1669/48207 [07:56<3:26:08,  3.76it/s]



  4%|▎         | 1699/48207 [08:05<3:42:30,  3.48it/s]



  4%|▎         | 1712/48207 [08:09<3:39:12,  3.53it/s]



  4%|▎         | 1722/48207 [08:11<3:37:02,  3.57it/s]



  4%|▎         | 1735/48207 [08:15<3:44:31,  3.45it/s]



  4%|▎         | 1768/48207 [08:25<3:42:39,  3.48it/s]



  4%|▎         | 1782/48207 [08:28<3:45:00,  3.44it/s]



  4%|▍         | 1903/48207 [09:17<3:43:54,  3.45it/s] 



  4%|▍         | 1909/48207 [09:19<3:33:49,  3.61it/s]



  4%|▍         | 1982/48207 [09:40<3:44:27,  3.43it/s]



  4%|▍         | 1997/48207 [09:44<3:45:17,  3.42it/s]



  4%|▍         | 2014/48207 [09:49<3:42:26,  3.46it/s]



  4%|▍         | 2029/48207 [09:53<3:43:40,  3.44it/s]



  4%|▍         | 2068/48207 [10:04<3:45:25,  3.41it/s]



  4%|▍         | 2079/48207 [10:07<3:40:52,  3.48it/s]



  4%|▍         | 2089/48207 [10:10<3:36:39,  3.55it/s]



  4%|▍         | 2113/48207 [10:16<3:36:44,  3.54it/s]



  4%|▍         | 2154/48207 [10:28<3:42:44,  3.45it/s]



  5%|▍         | 2221/48207 [10:47<3:40:21,  3.48it/s]



  5%|▍         | 2225/48207 [10:48<3:16:20,  3.90it/s]



  5%|▍         | 2323/48207 [11:34<3:49:02,  3.34it/s] 



  5%|▍         | 2347/48207 [11:41<3:49:03,  3.34it/s]



  5%|▍         | 2384/48207 [11:52<3:48:18,  3.35it/s]



  5%|▍         | 2394/48207 [11:54<3:49:05,  3.33it/s]



  5%|▌         | 2421/48207 [12:03<3:56:14,  3.23it/s]



  5%|▌         | 2427/48207 [12:04<3:39:29,  3.48it/s]



  5%|▌         | 2435/48207 [12:06<3:45:07,  3.39it/s]



  5%|▌         | 2484/48207 [12:21<3:51:22,  3.29it/s]



  5%|▌         | 2494/48207 [12:24<3:42:41,  3.42it/s]



  5%|▌         | 2578/48207 [12:49<3:47:12,  3.35it/s]



  5%|▌         | 2606/48207 [12:57<3:49:27,  3.31it/s]



  5%|▌         | 2618/48207 [13:00<3:45:22,  3.37it/s]



  5%|▌         | 2625/48207 [13:02<4:42:38,  2.69it/s]



  6%|▌         | 2656/48207 [13:11<3:46:19,  3.35it/s]



  6%|▌         | 2678/48207 [13:18<3:51:52,  3.27it/s]



  6%|▌         | 2695/48207 [13:23<3:50:19,  3.29it/s]



  6%|▌         | 2774/48207 [13:46<3:46:48,  3.34it/s]



  6%|▌         | 2789/48207 [13:51<3:53:50,  3.24it/s]



  6%|▌         | 2797/48207 [14:12<21:01:34,  1.67s/it]



  6%|▌         | 2830/48207 [14:22<3:48:17,  3.31it/s] 



  6%|▌         | 2845/48207 [14:26<3:48:26,  3.31it/s]



  6%|▌         | 2857/48207 [14:30<3:48:38,  3.31it/s]



  6%|▌         | 2945/48207 [14:56<3:51:04,  3.26it/s]



  6%|▌         | 2958/48207 [15:00<3:43:34,  3.37it/s]



  6%|▌         | 2997/48207 [15:12<3:49:04,  3.29it/s]



  6%|▌         | 3003/48207 [15:13<3:30:58,  3.57it/s]



  6%|▌         | 3011/48207 [15:15<3:41:02,  3.41it/s]



  6%|▋         | 3017/48207 [15:17<3:28:14,  3.62it/s]



  6%|▋         | 3030/48207 [15:20<3:40:37,  3.41it/s]



  6%|▋         | 3106/48207 [15:43<3:42:31,  3.38it/s]



  6%|▋         | 3116/48207 [15:45<3:39:08,  3.43it/s]



  7%|▋         | 3141/48207 [15:53<3:42:21,  3.38it/s]



  7%|▋         | 3148/48207 [15:54<3:31:21,  3.55it/s]



  7%|▋         | 3158/48207 [15:57<3:38:18,  3.44it/s]



  7%|▋         | 3177/48207 [16:03<3:44:59,  3.34it/s]



  7%|▋         | 3201/48207 [16:09<3:42:01,  3.38it/s]



  7%|▋         | 3225/48207 [16:16<3:45:32,  3.32it/s]



  7%|▋         | 3236/48207 [16:19<3:44:53,  3.33it/s]



  7%|▋         | 3250/48207 [16:23<3:48:23,  3.28it/s]



  7%|▋         | 3271/48207 [16:29<3:51:08,  3.24it/s]



  7%|▋         | 3277/48207 [16:31<3:33:12,  3.51it/s]



  7%|▋         | 3289/48207 [16:34<3:45:35,  3.32it/s]



  7%|▋         | 3309/48207 [16:40<3:44:20,  3.34it/s]



  7%|▋         | 3324/48207 [16:45<3:48:46,  3.27it/s]



  7%|▋         | 3331/48207 [16:47<3:40:19,  3.39it/s]



  7%|▋         | 3382/48207 [17:02<3:51:37,  3.23it/s]



  7%|▋         | 3387/48207 [17:04<3:28:52,  3.58it/s]



  7%|▋         | 3410/48207 [17:10<3:44:07,  3.33it/s]



  7%|▋         | 3488/48207 [17:34<3:48:33,  3.26it/s]