In [69]:
# Imports
import os
import random
import logging
import warnings


import numpy as np
import pandas as pd
from tqdm import tqdm
from multiprocessing import Pool
import multiprocessing

# Data processing and scientific computing
from scipy.io import wavfile
from scipy.signal import butter, lfilter
from scipy.spatial.distance import cosine

# Audio processing
import librosa
import soundfile as sf

# Visualization
import matplotlib.pyplot as plt
import plotly.express as px

# Set up logging
logging.basicConfig(level=logging.INFO)

### Data Cleansing
    - Remove low-quality or noisy recordings based on metadata if available, or by manually inspecting a few samples.
	    - It's a good idea to manually inspect some of the longer clips, especially outliers.
	    - - Listen for: a) Consistency of the bird call throughout the clip b) Presence of long periods of silence c) Sudden changes in background noise or environment
		- If you find issues, you might consider trimming these clips to the most relevant sections before segmenting.
		- - Definitely inspect a sample of low-rated files.
		- Listen for: a) Clarity of the bird call b) Signal-to-noise ratio (how clear the bird call is compared to background noise) c) Presence of distortions or artifacts
		- Consider setting a threshold for the quality rating, below which you might exclude files from your dataset.
		- Determining if a file is too "noisy":

		- This can be subjective, but here are some approaches: a) Signal-to-Noise Ratio (SNR): Calculate the SNR for each file. Files below a certain threshold could be considered too noisy. b) Spectral analysis: Look at the spectrogram. A very noisy file will have a lot of energy spread across all frequencies. c) Perceptual evaluation: Listen to a sample and rate them yourself. This can help you calibrate your automatic methods.

In [2]:
data = pd.read_csv('preprocessed_data.csv')

In [3]:
data.columns

Index(['genus', 'species', 'latitude', 'longitude', 'quality', 'file_name',
       'simplified_type', 'season', 'time_of_day', 'length_seconds'],
      dtype='object')

In [45]:
data['file_name'].value_counts()

file_name
Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.mp3                 1
Molothrus_ater_Lawrence_Woods_SNP_417996.mp3                                                         1
Agelaius_phoeniceus_Crosby_Township_near__Harrison_Hamilton_County_Ohio_482675.mp3                   1
Agelaius_phoeniceus_Case_Farm_Gates_Mills_Ohio_98722.mp3                                             1
Agelaius_phoeniceus_Ohio_Crane_Creek_SP_17011.mp3                                                    1
                                                                                                    ..
Tachycineta_bicolor_Pelee_Shuttle_Stop_near__Leamington_Essex_County_Ontario_477045.mp3              1
Tachycineta_bicolor_Ohio_Crane_Creek_SP_17046.mp3                                                    1
Progne_subis_Vermilion_River_Reservation:_Mill_Hollow_near__Vermilion_Erie_County_Ohio_784046.mp3    1
Progne_subis_Ottawa_NWR_Lucas_County_Ohio_803616.mp3           

### The metadata contains a column with a 'quality' rating. I'll manually examine those entries.

In [4]:
data['quality'].value_counts()

quality
A           274
B           262
C           116
D            32
E             4
no score      4
Name: count, dtype: int64

In [5]:

low_quality_files = data[(data['quality'] == 'E') | (data['quality'] == 'no score')]
display(low_quality_files)

Unnamed: 0,genus,species,latitude,longitude,quality,file_name,simplified_type,season,time_of_day,length_seconds
182,Vireo,bellii,39.2095,-84.7821,E,Vireo_bellii_Whitewater_Township_near__Harriso...,Song,Summer,Evening,3
197,Baeolophus,bicolor,40.942,-81.5236,no score,Baeolophus_bicolor_Ohio_near__Akron_Summit_Cou...,Call,Summer,Morning,15
227,Tachycineta,bicolor,41.9682,-82.5305,no score,Tachycineta_bicolor_Pelee_near__Leamington_Ess...,Call,Spring,Morning,2
364,Spizella,passerina,41.93338,-83.54994,E,Spizella_passerina_Michigan_Monroe_County_1717...,Song,Summer,Unknown,28
365,Spizella,pusilla,39.8888,-82.7978,no score,Spizella_pusilla_Madison_Township_near__Canal_...,Song,Spring,Afternoon,20
395,Melospiza,melodia,41.93338,-83.54994,E,Melospiza_melodia_Michigan_Monroe_County_17143...,Song,Summer,Unknown,34
563,Geothlypis,trichas,41.1895,-81.5781,E,Geothlypis_trichas_Ohio_near__Peninsula_Summit...,Song,Summer,Morning,30
687,Sonus,naturalis,41.433,-81.418,no score,Sonus_naturalis_Chagrin_Falls_Township_near__M...,Song,Spring,Afternoon,28


### Convert mp3 to wav

In [6]:
def convert_mp3_to_wav(mp3_path, wav_path):
    """
    Convert an MP3 file to WAV format using librosa and soundfile.
    
    Args:
    mp3_path (str): Path to the input MP3 file
    wav_path (str): Path to save the output WAV file
    
    Returns:
    str: Path to the created WAV file
    
    Raises:
    FileNotFoundError: If the input MP3 file is not found
    """
    if not os.path.exists(mp3_path):
        raise FileNotFoundError(f"MP3 file not found: {mp3_path}")
    
    # Load the mp3 file
    audio, sr = librosa.load(mp3_path, sr=None, mono=False)
    
    # Save as wav
    sf.write(wav_path, audio.T, sr)
    
    return wav_path

def batch_convert_to_wav(data, input_dir, output_dir):
    """
    Convert all MP3 files in the dataset to WAV format.
    
    Args:
    data (pd.DataFrame): DataFrame containing file information
    input_dir (str): Directory containing the input MP3 files
    output_dir (str): Directory to save the output WAV files
    
    Returns:
    tuple: (pd.DataFrame, list) Updated DataFrame with new file paths and list of files not found
    """
    os.makedirs(output_dir, exist_ok=True)
    
    new_data = data.copy()
    files_not_found = []
    
    for index, row in new_data.iterrows():
        mp3_path = os.path.join(input_dir, row['file_name'])
        wav_filename = os.path.splitext(row['file_name'])[0] + '.wav'
        wav_path = os.path.join(output_dir, wav_filename)
        
        try:
            convert_mp3_to_wav(mp3_path, wav_path)
            new_data.at[index, 'file_name'] = wav_filename
        except FileNotFoundError:
            files_not_found.append(row['file_name'])
            new_data = new_data.drop(index)
    
    return new_data.reset_index(drop=True), files_not_found

In [7]:
original_dir = 'Original Recordings'
converted_dir = 'Converted Recordings'

# Convert the MP3 files to WAV
print("Converting MP3 files to WAV...")
converted_data, missing_files = batch_convert_to_wav(data, original_dir, converted_dir)

# Print summary
print(f"Conversion complete. {len(converted_data)} files converted.")
print(f"WAV files saved in: {converted_dir}")
print(f"Number of files not found: {len(missing_files)}")

# If you want to examine the list of missing files
print("List of files not found:")
print(missing_files)

Converting MP3 files to WAV...
Conversion complete. 691 files converted.
WAV files saved in: Converted Recordings
Number of files not found: 1
List of files not found:
['Colaptes_auratus_Miami_Township_near__North_Bend_Hamilton_County_Ohio_713588.mp3']


In [47]:
converted_data['file_name'].value_counts()

file_name
Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.wav                 1
Molothrus_ater_Lawrence_Woods_SNP_417996.wav                                                         1
Agelaius_phoeniceus_Crosby_Township_near__Harrison_Hamilton_County_Ohio_482675.wav                   1
Agelaius_phoeniceus_Case_Farm_Gates_Mills_Ohio_98722.wav                                             1
Agelaius_phoeniceus_Ohio_Crane_Creek_SP_17011.wav                                                    1
                                                                                                    ..
Tachycineta_bicolor_Ohio_Crane_Creek_SP_17046.wav                                                    1
Progne_subis_Vermilion_River_Reservation:_Mill_Hollow_near__Vermilion_Erie_County_Ohio_784046.wav    1
Progne_subis_Ottawa_NWR_Lucas_County_Ohio_803616.wav                                                 1
Progne_subis_Magee_Marsh_Ohio_164751.wav                       

"""
Audio Cleaning Functions

These functions collectively clean an audio file by:
1. Calculating its signal-to-noise ratio (SNR) and filtering out audio that is too noisy.
2. Detecting and trimming long silences from the audio.
3. Checking for spectral spread, which is an indicator of unwanted noise or anomalies.

Main function:
- `clean_audio`: Uses `is_too_noisy`, `has_long_silence`, and `check_spectral_spread` to decide if an audio file is suitable for further processing.
"""

- **Feature Extraction with Librosa**:
    - Extract features like **Mel-spectrograms** and **MFCCs** from each audio file. These features are effective for audio classification tasks.
    - Store these features as images (for CNN input) or numerical arrays (for models like Random Forest or RNNs).

    - **Audio Standardization**:
    - Convert all files to a consistent format (e.g., 16-bit WAV, mono-channel, and a sampling rate like 16 kHz).
- **Clip Standardization**:
    - Trim or pad each audio clip to a standard duration (e.g., 5 seconds), so all inputs have the same shape.

### Start with resampling so every file has the same sample rate

In [None]:
# Define global variable
# Define sample rate
sr = 44100

In [8]:

def resample_audio(file_path, target_sr=sr):
    try:
        # Load the audio file
        audio, sr = librosa.load(file_path, sr=None)
        
        # Resample if necessary
        if sr != target_sr:
            audio = librosa.resample(y=audio, orig_sr=sr, target_sr=target_sr)
            
            # Overwrite the original file
            sf.write(file_path, audio, target_sr)
            
            return True  # Indicate that resampling was performed
        else:
            return False  # Indicate that no resampling was needed
    except Exception as e:
        logging.error(f"Error processing {file_path}: {str(e)}")
        return None

def resample_all_files(directory='Converted Recordings', target_sr=44100):
    total_files = 0
    resampled_files = 0
    error_files = 0
    
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):  # Assuming we're only processing .wav files
            total_files += 1
            file_path = os.path.join(directory, filename)
            result = resample_audio(file_path, target_sr)
            if result is True:
                resampled_files += 1
            elif result is None:
                error_files += 1
    
    print(f"Resampling process complete.")
    print(f"Total files checked: {total_files}")
    print(f"Files resampled: {resampled_files}")
    if error_files > 0:
        print(f"Files with errors: {error_files}")

# Usage
logging.basicConfig(level=logging.ERROR)  # Only log errors
resample_all_files('Converted Recordings')

Resampling process complete.
Total files checked: 690
Files resampled: 311


In [9]:
def calculate_snr(audio):
    """Calculate the signal-to-noise ratio of an audio clip."""
    signal = np.mean(audio**2)
    noise = np.mean((audio - np.mean(audio))**2)
    snr = 10 * np.log10(signal / noise)
    return snr

def is_too_noisy(audio, sr, threshold=-20):
    """Check if audio is too noisy based on its SNR."""
    snr = calculate_snr(audio)
    return snr < threshold

def has_long_silence(audio, sr, silence_threshold=-60, min_silence_duration=1.0):
    """Detects long silences within the audio clip."""
    intervals = librosa.effects.split(audio, top_db=-silence_threshold)
    if len(intervals) > 1:
        silence_durations = np.diff(intervals.ravel())[1::2] / sr
        return np.any(silence_durations >= min_silence_duration)
    return False

def check_spectral_spread(audio, sr, threshold=0.8):
    """Check if the spectral spread exceeds the specified threshold."""
    spec = np.abs(librosa.stft(audio))
    spectral_spread = np.sum(spec > np.mean(spec)) / spec.size
    return spectral_spread > threshold

def clean_audio(audio, sr, file_path, shared_discarded_files):
    """Cleans an audio file by removing noise, silence, and checking for spectral spread."""
    # Get file name for logging
    file_name = os.path.basename(file_path)

    # Check noise level
    if is_too_noisy(audio, sr):
        shared_discarded_files.append({'file_path': file_path, 'reason': 'too_noisy', 'snr': calculate_snr(audio)})
        return None
    
    # Check for long silences
    if has_long_silence(audio, sr):
        audio = librosa.effects.trim(audio, top_db=20)[0]
    
    # Check spectral spread
    if check_spectral_spread(audio, sr):
        shared_discarded_files.append({'file_path': file_path, 'reason': 'bad_spectral_spread'})
        return None
    
    return audio

### Testing the functions on a single file

In [11]:


# Choose a random file
random_file = random.choice(converted_data['file_name'].tolist())
file_path = os.path.join('Converted Recordings', random_file)

# Load the audio file
audio, _ = librosa.load(file_path, sr=sr)

# Test calculate_snr function
snr = calculate_snr(audio)
print(f"calculate_snr completed. SNR: {snr}")

# Test is_too_noisy function
too_noisy = is_too_noisy(audio, sr)
print(f"is_too_noisy completed. Result: {too_noisy}")

# Test has_long_silence function
long_silence = has_long_silence(audio, sr)
print(f"has_long_silence completed. Result: {long_silence}")

# Test check_spectral_spread function
spectral_spread = check_spectral_spread(audio, sr)
print(f"check_spectral_spread completed. Result: {spectral_spread}")

# Test clean_audio function
shared_discarded_files = []
cleaned_audio = clean_audio(audio, sr, file_path, shared_discarded_files)
print(f"clean_audio completed. Cleaned audio returned: {'Yes' if cleaned_audio is not None else 'No'}")
if shared_discarded_files:
    print(f"File discarded. Reason: {shared_discarded_files[0]['reason']}")

print(f"\nTested file: {file_path}")

calculate_snr completed. SNR: 2.070876945481359e-06
is_too_noisy completed. Result: False
has_long_silence completed. Result: False
check_spectral_spread completed. Result: False
clean_audio completed. Cleaned audio returned: Yes

Tested file: Converted Recordings\Melospiza_melodia_Battelle_Darby_Metro_Park--Darby_Dan_Training_Loop_trail_SW_corner_105818.wav


### Test duplication functions two files

In [12]:
def get_audio_fingerprint(audio, sr):
    n_fft = min(2048, len(audio))
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, n_fft=n_fft)
    return np.mean(mfccs, axis=1)

def are_near_duplicates(audio1, sr1, audio2, sr2, threshold=0.99):
    if len(audio1) == 0 or len(audio2) == 0:
        raise ValueError("One or both audio files are empty")
    
    if sr1 != sr2:
        print(f"Warning: Sample rates differ ({sr1} vs {sr2}). Resampling may be necessary.")
    
    fp1 = get_audio_fingerprint(audio1, sr1)
    fp2 = get_audio_fingerprint(audio2, sr2)
    
    if len(fp1) != len(fp2):
        raise ValueError("Fingerprints have different lengths")
    
    similarity = 1 - cosine(fp1, fp2)
    return similarity > threshold

def check_audio_duplicates(file_path, shared_duplicates, shared_discarded_files):
    try:
        # Load the audio
        audio, sr = librosa.load(file_path, sr=None)
        
        # Check for duplicates
        for existing_audio, existing_sr, existing_path in shared_duplicates:
            if are_near_duplicates(audio, sr, existing_audio, existing_sr):
                shared_discarded_files.append((file_path, 'duplicate'))
                return True  # It's a duplicate

        # If not a duplicate, store the audio and path
        shared_duplicates.append((audio, sr, file_path))
        return False  # It's not a duplicate
    except Exception as e:
        logging.error(f"Error checking duplicates for {file_path}: {str(e)}")
        shared_discarded_files.append((file_path, f'error: {str(e)}'))
        return None  # Error occurred

def check_and_remove_duplicates(directory='Converted Recordings'):
    shared_duplicates = []
    shared_discarded_files = []
    duplicate_files = []
    total_files = 0

    # Check each file in the directory for duplicates
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):  # We're only processing .wav files
            total_files += 1
            file_path = os.path.join(directory, filename)
            is_duplicate = check_audio_duplicates(file_path, shared_duplicates, shared_discarded_files)
            if is_duplicate:
                duplicate_files.append(file_path)

    # Log the results
    print(f"Total files checked: {total_files}")
    print(f"Duplicate files found: {len(duplicate_files)}")

    return duplicate_files, shared_discarded_files

In [13]:
### Check duplicate checking functions on two files

# Define sample rate
sr = 44100

# Choose two random files
random_files = random.sample(converted_data['file_name'].tolist(), 2)
file_paths = [os.path.join('Converted Recordings', file) for file in random_files]

# Load the audio files
audio1, _ = librosa.load(file_paths[0], sr=sr)
audio2, _ = librosa.load(file_paths[1], sr=sr)

# Test get_audio_fingerprint function
print("Testing get_audio_fingerprint function:")
fingerprint1 = get_audio_fingerprint(audio1, sr)
print(f"get_audio_fingerprint completed for file 1.")
print(f"Fingerprint shape: {fingerprint1.shape}")
print(f"Fingerprint: {fingerprint1}")

fingerprint2 = get_audio_fingerprint(audio2, sr)
print(f"get_audio_fingerprint completed for file 2.")
print(f"Fingerprint shape: {fingerprint2.shape}")
print(f"Fingerprint: {fingerprint2}")

# Test are_near_duplicates function with different thresholds
print("\nTesting are_near_duplicates function:")
thresholds = [0.95, 0.99, 0.999]
for threshold in thresholds:
    try:
        similarity = 1 - cosine(fingerprint1, fingerprint2)
        are_duplicates = similarity > threshold
        print(f"Threshold: {threshold}")
        print(f"Similarity score: {similarity}")
        print(f"Result: The two files are {'near duplicates' if are_duplicates else 'not near duplicates'}.")
    except ValueError as e:
        print(f"Error occurred: {str(e)}")

print(f"\nTested files:")
print(f"File 1: {file_paths[0]}")
print(f"File 2: {file_paths[1]}")

Testing get_audio_fingerprint function:
get_audio_fingerprint completed for file 1.
Fingerprint shape: (13,)
Fingerprint: [-382.887      131.52614    -25.590466    42.742638    11.518386
   18.056452    -2.9881737   26.282648    12.907345    14.582416
    3.7415042   15.959805     2.6299613]
get_audio_fingerprint completed for file 2.
Fingerprint shape: (13,)
Fingerprint: [-428.40958     -6.4183793 -147.9484     -62.548386   -90.87569
   21.31862    -40.288704    24.530449   -25.477175    28.19994
   -9.91276     24.831785    -3.5105407]

Testing are_near_duplicates function:
Threshold: 0.95
Similarity score: 0.8521050087261923
Result: The two files are not near duplicates.
Threshold: 0.99
Similarity score: 0.8521050087261923
Result: The two files are not near duplicates.
Threshold: 0.999
Similarity score: 0.8521050087261923
Result: The two files are not near duplicates.

Tested files:
File 1: Converted Recordings\Thryothorus_ludovicianus_Park_End_Dr_Montgomery_County_Ohio_477391.wav
F

In [14]:
# # Apply duplicate checking to all files
# logging.basicConfig(level=logging.INFO)
# duplicates, discarded_files = check_and_remove_duplicates('Converted Recordings')

# print(f"Duplicate files found: {len(duplicates)}")
# print("List of duplicate files:")
# for file in duplicates:
#     print(file)

# print(f"\nTotal discarded files: {len(discarded_files)}")
# print("List of discarded files and reasons:")
# for file, reason in discarded_files:
#     print(f"{file}: {reason}")

In [15]:
def process_audio(file_path, shared_discarded_files, target_length=5, overlap=0.5, target_sr=44100):
    print(f"Processing file: {file_path}")  # For process_audio
    try:
        # Load and clean the audio
        audio, sr = librosa.load(file_path, sr=None)
        audio = clean_audio(audio, sr, file_path, shared_discarded_files)
        if audio is None:
            return None, None
        
        # Check if audio is shorter than 4410 samples (100ms at 44.1kHz)
        if len(audio) < 4410:
            shared_discarded_files.append((file_path, 'too_short'))
            return None, None
            
        # Convert target_length to samples
        target_samples = int(sr * target_length)
        
        # If audio is shorter than target length, pad with zeros
        if len(audio) < target_samples:
            audio = np.pad(audio, (0, target_samples - len(audio)))
        
        # If audio is longer than target length, segment with overlap
        else:
            segments = []
            for start in range(0, len(audio), int(target_samples * (1 - overlap))):
                segment = audio[start:start + target_samples]
                if len(segment) == target_samples:
                    segments.append(segment)
                elif len(segment) > 0:
                    segment = np.pad(segment, (0, target_samples - len(segment)))
                    segments.append(segment)
            audio = np.array(segments)

        return audio, sr
    except Exception as e:
        logging.error(f"Error processing {file_path}: {str(e)}")
        shared_discarded_files.append((file_path, f'error: {str(e)}'))
        return None, None


In [16]:
def process_file(args):
    row, audio_dir, output_dir, shared_discarded_files = args
    file_path = os.path.join(audio_dir, row['file_name'])
    print(f"Row: {row['file_name']}")  # For process_file
    if not os.path.exists(file_path):
        logging.warning(f"File not found: {file_path}")
        return None
    
    processed_audio, sr = process_audio(file_path, shared_discarded_files)
    if processed_audio is None:
        return None
    
    processed_data = []
    if processed_audio.ndim == 2:
        for i, segment in enumerate(processed_audio):
            new_row = row.copy()
            base_filename = f"{os.path.splitext(row['file_name'])[0]}_segment_{i}"
            new_row['processed_file'] = f"{base_filename}.wav"
            wavfile.write(os.path.join(output_dir, new_row['processed_file']), sr, segment)
            processed_data.append(new_row)
    else:
        base_filename = f"{os.path.splitext(row['file_name'])[0]}_processed"
        row['processed_file'] = f"{base_filename}.wav"
        wavfile.write(os.path.join(output_dir, row['processed_file']), sr, processed_audio)
        processed_data.append(row)
    
    return processed_data


In [17]:
def process_dataset(converted_data, audio_dir, output_dir):
    manager = multiprocessing.Manager()
    shared_discarded_files = manager.list()
    
    print(f"Audio directory: {audio_dir}, Output directory: {output_dir}")
    results = []
    
    for _, row in converted_data.iterrows():
        results.append(process_file((row, audio_dir, output_dir, shared_discarded_files)))
    
    processed_data = [item for sublist in results if sublist is not None for item in sublist]

    # Save discarded files to a DataFrame and export as CSV
    discard_log_df = pd.DataFrame(list(shared_discarded_files), columns=['file_path', 'reason'])
    discard_log_df.to_csv('discarded_audio_log.csv', index=False)

    return pd.DataFrame(processed_data)

In [18]:
## Testing audio processing functions on a single file

# Select a random file
random_file = random.choice(converted_data['file_name'])
file_path = os.path.join('Converted Recordings', random_file)

print(f"Testing audio processing on file: {random_file}")

# Create a dummy row for testing
test_row = pd.Series({'file_name': random_file})

# Create necessary directories
output_dir = 'Test'
os.makedirs(output_dir, exist_ok=True)

# Process the audio file
processed_audio, sr = process_audio(file_path)

if processed_audio is not None:
    print(f"Audio processed successfully.")
    print(f"Processed audio shape: {processed_audio.shape}")
    print(f"Sample rate: {sr}")

    # Simulate the process_file function
    args = (test_row, 'Converted Recordings', output_dir)
    processed_data = process_file(args)

    if processed_data is not None:
        print("\nProcessed data:")
        for item in processed_data:
            print(f"Processed file: {item['processed_file']}")
        print(f"Number of segments: {len(processed_data)}")

        # Verify the output files
        for item in processed_data:
            output_file = os.path.join(output_dir, item['processed_file'])
            if os.path.exists(output_file):
                print(f"Output file created: {output_file}")
                # Load and print some information about the output file
                audio, sr = librosa.load(output_file, sr=None)
                print(f"Output audio duration: {librosa.get_duration(y=audio, sr=sr):.2f} seconds")
            else:
                print(f"Error: Output file not created: {output_file}")
    else:
        print("Error: process_file returned None")
else:
    print("Error: Audio processing failed")

print("\nAudio processing test complete.")

In [19]:
# Main execution
converted_dir = 'Converted Recordings'
processed_dir = 'Processed Recordings'

# Ensure output directory exists
if not os.path.exists(processed_dir):
    os.makedirs(processed_dir)

processed_data = process_dataset(converted_data, converted_dir, processed_dir)
print('Audio Processing Complete')

# Filter processed_data to only include files with "segment" in the file name
processed_data = processed_data[processed_data['processed_file'].str.contains('_segment_')]

# Print completion message and count files in Processed Recordings directory
processed_file_count = len([f for f in os.listdir(processed_dir) if f.endswith('.wav')])
print(f"\nAudio processing is complete. There are now {processed_file_count} files in the '{processed_dir}' directory.")

# Output discarded files
discard_log_df = pd.read_csv('discarded_audio_log.csv')
if not discard_log_df.empty:
    print("\nThe following files were discarded:")
    for _, row in discard_log_df.iterrows():
        print(f"{row['file_path']}: {row['reason']}")
else:
    print("\nNo files were discarded.")

Audio directory: Converted Recordings, Output directory: Processed Recordings
Row: Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.wav
Processing file: Converted Recordings\Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.wav
Row: Branta_canadensis_Lawrence_Woods_SNP_418000.wav
Processing file: Converted Recordings\Branta_canadensis_Lawrence_Woods_SNP_418000.wav
Row: Branta_canadensis_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_691528.wav
Processing file: Converted Recordings\Branta_canadensis_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_691528.wav
Row: Cygnus_buccinator_Killdeer_Plains_Wildlife_Management_Area_Wyandot_County_Ohio_713788.wav
Processing file: Converted Recordings\Cygnus_buccinator_Killdeer_Plains_Wildlife_Management_Area_Wyandot_County_Ohio_713788.wav
Row: Aix_sponsa_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_815809.w

In [20]:
print(processed_data.info())
processed_data.head()

<class 'pandas.core.frame.DataFrame'>
Index: 12120 entries, 0 to 690
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   genus            12120 non-null  object 
 1   species          12120 non-null  object 
 2   latitude         12120 non-null  float64
 3   longitude        12120 non-null  float64
 4   quality          12120 non-null  object 
 5   file_name        12120 non-null  object 
 6   simplified_type  12120 non-null  object 
 7   season           12120 non-null  object 
 8   time_of_day      12120 non-null  object 
 9   length_seconds   12120 non-null  int64  
 10  processed_file   12120 non-null  object 
dtypes: float64(2), int64(1), object(8)
memory usage: 1.1+ MB
None


Unnamed: 0,genus,species,latitude,longitude,quality,file_name,simplified_type,season,time_of_day,length_seconds,processed_file
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...
0,Branta,canadensis,39.2095,-84.7821,A,Branta_canadensis_Whitewater_Township_near__Ha...,Call,Spring,Morning,14,Branta_canadensis_Whitewater_Township_near__Ha...


### Data Augmentation
### Augment the processed audio files.
- Pitch Shift
- Time Stretch
- add_noise
- change_speed
- apply_filter
- add_background
- time_shift
- augment_audio
- 

In [21]:

def generate_wind_sound(duration, sr):
    t = np.linspace(0, duration, int(sr * duration), False)
    wind = np.random.normal(0, 0.1, int(sr * duration))
    wind_filtered = np.convolve(wind, np.ones(1000)/1000, mode='same')
    return wind_filtered / np.max(np.abs(wind_filtered))

def generate_leaf_rustle(duration, sr):
    t = np.linspace(0, duration, int(sr * duration), False)
    leaf = np.random.normal(0, 0.1, int(sr * duration))
    envelope = np.exp(-t * 10) * np.sin(2 * np.pi * 2 * t)**2
    return leaf * envelope / np.max(np.abs(leaf * envelope))

def generate_water_sound(duration, sr):
    t = np.linspace(0, duration, int(sr * duration), False)
    water = np.random.normal(0, 0.1, int(sr * duration))
    water_filtered = np.convolve(water, np.ones(500)/500, mode='same')
    ripple = np.sin(2 * np.pi * 2 * t) * np.exp(-t * 2)
    return (water_filtered + ripple) / np.max(np.abs(water_filtered + ripple))

def mix_nature_sounds(duration, sr):
    wind = generate_wind_sound(duration, sr)
    leaf = generate_leaf_rustle(duration, sr)
    water = generate_water_sound(duration, sr)
    
    mix = wind * 0.7 + leaf * 0.2 + water * 0.1
    return mix / np.max(np.abs(mix))

# Generate a 5-second mix of nature-like sounds
sr = 44100
duration = 5
nature_background = mix_nature_sounds(duration, sr)

In [22]:

def pitch_shift(audio, sr, n_steps):
    n_fft = min(2048, len(audio))
    return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps, n_fft=n_fft)

def time_stretch(audio, rate):
    return librosa.effects.time_stretch(audio, rate=rate)

def add_noise(audio, noise_factor):
    noise = np.random.randn(len(audio))
    augmented_audio = audio + noise_factor * noise
    return np.clip(augmented_audio, -1, 1)

def change_speed(audio, speed_factor):
    return librosa.effects.time_stretch(audio, rate=1/speed_factor)

def apply_filter(audio, sr, filter_type='lowpass', cutoff=1000):
    nyquist = 0.5 * sr
    normal_cutoff = cutoff / nyquist
    b, a = butter(4, normal_cutoff, btype=filter_type, analog=False)
    return lfilter(b, a, audio)

def add_background(audio, background, ratio=0.1):
    if len(background) != len(audio):
        if len(background) > len(audio):
            start = np.random.randint(0, len(background) - len(audio))
            background = background[start:start+len(audio)]
        else:
            background = np.pad(background, (0, len(audio) - len(background)))
    
    if len(audio) != len(background):
        print(f"Shape mismatch in add_background: audio {len(audio)}, background {len(background)}")
        return audio  # Return original audio if shapes don't match
    
    return audio + ratio * background

def time_shift(audio, shift_max, roll_prob=0.5):
    shift = np.random.randint(-shift_max, shift_max)
    if random.random() < roll_prob:
        return np.roll(audio, shift)
    else:
        if shift > 0:
            return np.pad(audio, (shift, 0))[:len(audio)]
        else:
            return np.pad(audio, (0, -shift))[:-shift]

def augment_audio(audio, sr):
    augmentations = [
        (pitch_shift, {'n_steps': random.uniform(-2, 2)}),
        (time_stretch, {'rate': random.uniform(0.8, 1.2)}),
        (add_noise, {'noise_factor': random.uniform(0.001, 0.015)}),
        (change_speed, {'speed_factor': random.uniform(0.9, 1.1)}),
        (apply_filter, {'filter_type': random.choice(['lowpass', 'highpass']),
                        'cutoff': random.uniform(1000, 4000)}),
        (time_shift, {'shift_max': int(sr * 0.5)})
    ]
    
    num_augments = random.randint(2, 4)
    selected_augments = random.sample(augmentations, num_augments)
    
    applied_augmentations = []
    
    for augment_func, params in selected_augments:
        if augment_func.__name__ in ['pitch_shift', 'apply_filter']:
            audio = augment_func(audio, sr, **params)
        else:
            audio = augment_func(audio, **params)
        applied_augmentations.append(f"{augment_func.__name__}:{','.join(f'{k}={v}' for k, v in params.items())}")
    
    if random.random() < 0.5:
        nature_background = mix_nature_sounds(len(audio) / sr, sr)
        ratio = random.uniform(0.1, 0.3)
        audio = add_background(audio, nature_background, ratio=ratio)
        applied_augmentations.append(f"add_background:ratio={ratio:.2f}")
    
    return audio, applied_augmentations



In [23]:

def augment_and_save(input_file, output_dir, num_augmentations=3):
    try:
        # Load the audio file
        audio, sr = librosa.load(input_file, sr=None)
        
        augmented_files = []
        all_applied_augmentations = []
        
        for i in range(num_augmentations):
            # Apply augmentation
            augmented_audio, applied_augmentations = augment_audio(audio, sr)
            
            # Generate new filename
            base_name = os.path.basename(input_file)
            name, ext = os.path.splitext(base_name)
            new_name = f"{name}_aug_{i+1}{ext}"
            output_path = os.path.join(output_dir, new_name)
            
            # Save augmented audio
            sf.write(output_path, augmented_audio, sr)
            
            augmented_files.append(output_path)
            all_applied_augmentations.append(';'.join(applied_augmentations))
        
        return augmented_files, all_applied_augmentations
    
    except Exception as e:
        print(f"Error processing {input_file}: {str(e)}")
        return [], []


def process_dataframe(df, input_dir, output_dir, num_augmentations=3):
    new_rows = []
    
    for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing files"):
        input_file = os.path.join(input_dir, row['processed_file'])
        augmented_files, augmentations = augment_and_save(input_file, output_dir, num_augmentations)
        
        for aug_file, aug_details in zip(augmented_files, augmentations):
            new_row = row.copy()
            new_row['processed_file'] = os.path.relpath(aug_file, output_dir)
            new_row['augmentations'] = aug_details
            new_rows.append(new_row)
    
    augmented_df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)
    return augmented_df


In [24]:
# Test Augmentation functions
# Filter processed_data to only include files with "segment" in the file name
processed_data = processed_data[processed_data['processed_file'].str.contains('_segment_')]

# Convert 'file_name' column to a list
file_names = processed_data['processed_file'].tolist()

# Select a random file
random_file = random.choice(file_names)
file_path = os.path.join('Processed Recordings', random_file)

# Load the audio file
audio, sr = librosa.load(file_path, sr=None)

# Create 'Test' directory if it doesn't exist
os.makedirs('Test', exist_ok=True)

# List of augmentation functions to test
augmentation_functions = [
    (pitch_shift, {'n_steps': 2}),
    (time_stretch, {'rate': 1.2}),
    (add_noise, {'noise_factor': 0.01}),
    (change_speed, {'speed_factor': 1.1}),
    (apply_filter, {'filter_type': 'lowpass', 'cutoff': 2000}),
    (time_shift, {'shift_max': int(sr * 0.5)}),
    (mix_nature_sounds, {'duration': len(audio) / sr, 'sr': sr})
]

print(f"Testing augmentations on file: {random_file}")

# Apply each augmentation function and save the result
for i, (func, params) in enumerate(augmentation_functions):
    if func.__name__ == 'mix_nature_sounds':
        # For mix_nature_sounds, handle it differently as it needs a background
        background = func(**params)
        augmented = add_background(audio, background, ratio=0.2)
    elif func.__name__ in ['time_stretch', 'change_speed', 'add_noise', 'time_shift']:
        # These functions don't need sr
        augmented = func(audio, **params)
    else:
        # For pitch_shift and apply_filter, which need sr, pass audio, sr, and params
        augmented = func(audio, sr, **params)
    
    # Generate output filename
    output_filename = f"{os.path.splitext(random_file)[0]}_{func.__name__}.wav"
    output_path = os.path.join('Test', output_filename)
    
    # Save the augmented audio
    sf.write(output_path, augmented, sr)
    
    print(f"Saved {output_filename}")

print("Augmentation test complete.")

Testing augmentations on file: Setophaga_ruticilla_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_825242_segment_38.wav
Augmentation test complete.


In [25]:

# Apply augmentation to every file
input_dir = 'Processed Recordings'
output_dir = 'Augmented Recordings'
num_augmentations = 3


# Process the dataframe
augmented_data = process_dataframe(processed_data, input_dir, output_dir, num_augmentations)

print(f"Augmentation complete. {len(augmented_data) - len(processed_data)} new samples created.")

Processing files:   0%|          | 17/12120 [00:06<1:02:51,  3.21it/s]

Error processing Processed Recordings\Branta_canadensis_Lawrence_Woods_SNP_418000_segment_10.wav: operands could not be broadcast together with shapes (1000,) (521,) 


Processing files:   0%|          | 59/12120 [00:23<1:14:38,  2.69it/s]

Error processing Processed Recordings\Cygnus_buccinator_Killdeer_Plains_Wildlife_Management_Area_Wyandot_County_Ohio_713788_segment_32.wav: operands could not be broadcast together with shapes (500,) (235,) 


Processing files:   2%|▏         | 236/12120 [01:33<1:25:26,  2.32it/s]

Error processing Processed Recordings\Antrostomus_vociferus_Sutton_Braxton_County_West_Virginia_929758_segment_19.wav: operands could not be broadcast together with shapes (1000,) (585,) 


Processing files:   6%|▋         | 768/12120 [04:55<50:28,  3.75it/s]  

Error processing Processed Recordings\Chlidonias_niger_Pelee_near__Leamington_Essex_County_Ontario_476265_segment_1.wav: operands could not be broadcast together with shapes (500,) (119,) 




Error processing Processed Recordings\Bubo_virginianus_mount_orab_ohio_760068_segment_2.wav: operands could not be broadcast together with shapes (500,) (168,) 


Processing files:  11%|█         | 1323/12120 [08:34<59:15,  3.04it/s]  

Error processing Processed Recordings\Melanerpes_carolinus_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_687174_segment_5.wav: operands could not be broadcast together with shapes (500,) (291,) 


Processing files:  11%|█▏        | 1367/12120 [08:52<1:07:05,  2.67it/s]

Error processing Processed Recordings\Melanerpes_carolinus_Wegerzyn_garden_near__Dayton_Montgomery_County_Ohio_807218_segment_35.wav: operands could not be broadcast together with shapes (500,) (462,) 


Processing files:  12%|█▏        | 1408/12120 [09:07<47:37,  3.75it/s]  

Error processing Processed Recordings\Melanerpes_carolinus_Harrison_Township_near__Harrison_Hamilton_County_Ohio_691526_segment_5.wav: operands could not be broadcast together with shapes (1000,) (570,) 


Processing files:  12%|█▏        | 1486/12120 [09:37<56:00,  3.16it/s]  

Error processing Processed Recordings\Dryobates_pubescens_Point_Pelee_National_Park_near__Wheatley_Essex_County_Ontario_803584_segment_6.wav: operands could not be broadcast together with shapes (1000,) (641,) 


Processing files:  14%|█▍        | 1680/12120 [13:17<4:33:56,  1.57s/it]

Error processing Processed Recordings\Sayornis_phoebe_Harrison_Township_near__Harrison_Hamilton_County_Ohio_713591_segment_9.wav: operands could not be broadcast together with shapes (1000,) (557,) 


Processing files:  15%|█▍        | 1771/12120 [15:10<3:26:22,  1.20s/it]

Error processing Processed Recordings\Contopus_virens_Harrison_Township_near__Harrison_Hamilton_County_Ohio_726814_segment_1.wav: operands could not be broadcast together with shapes (1000,) (985,) 


Processing files:  15%|█▌        | 1834/12120 [16:33<3:01:14,  1.06s/it]

Error processing Processed Recordings\Contopus_virens_Whitewater_Township_near__Cleves_Hamilton_County_Ohio_928711_segment_4.wav: operands could not be broadcast together with shapes (1000,) (615,) 


Processing files:  17%|█▋        | 2021/12120 [21:02<3:33:22,  1.27s/it]

Error processing Processed Recordings\Empidonax_traillii_Franklin_Township_near__Wooster_Wayne_County_Ohio_418568_segment_5.wav: operands could not be broadcast together with shapes (500,) (422,) 


Processing files:  17%|█▋        | 2067/12120 [22:12<2:43:54,  1.02it/s]

Error processing Processed Recordings\Empidonax_traillii_Magee_Marsh_Ohio_298575_segment_36.wav: operands could not be broadcast together with shapes (1000,) (710,) 




Error processing Processed Recordings\Vireo_gilvus_Benton_Township_near__Rocky_Ridge_Ottawa_County_Ohio_902984_segment_4.wav: operands could not be broadcast together with shapes (500,) (282,) 


Processing files:  21%|██        | 2501/12120 [24:55<55:08,  2.91it/s]  

Error processing Processed Recordings\Vireo_gilvus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827588_segment_6.wav: operands could not be broadcast together with shapes (1000,) (558,) 


Processing files:  24%|██▍       | 2894/12120 [27:24<1:23:45,  1.84it/s]

Error processing Processed Recordings\Vireo_bellii_Heritage_Rail-Trail_South_of_Hayden_Run_Rd_~200m_south_of_mile_marker_2_across_from_nest_box_#10_103570_segment_19.wav: operands could not be broadcast together with shapes (1000,) (763,) 




Error processing Processed Recordings\Vireo_bellii_Heritage_Rail-Trail_South_of_Hayden_Run_Rd_~200m_south_of_mile_marker_2_across_from_nest_box_#10_103570_segment_59.wav: operands could not be broadcast together with shapes (1000,) (640,) 


Processing files:  25%|██▍       | 3021/12120 [28:09<46:14,  3.28it/s]  

Error processing Processed Recordings\Cyanocitta_cristata_Green_Township_near__Cincinnati_Hamilton_County_Ohio_672963_segment_2.wav: operands could not be broadcast together with shapes (1000,) (797,) 


Processing files:  26%|██▌       | 3140/12120 [28:55<55:10,  2.71it/s]

Error processing Processed Recordings\Corvus_ossifragus_Howe_Ave_@_Jared__Cuyahoga_Falls_Summit_County_Ohio_398409_segment_0.wav: operands could not be broadcast together with shapes (1000,) (998,) 


Processing files:  26%|██▌       | 3155/12120 [29:02<57:16,  2.61it/s]  

Error processing Processed Recordings\Baeolophus_bicolor_Harrison_Township_near__Harrison_Hamilton_County_Ohio_807495_segment_5.wav: operands could not be broadcast together with shapes (1000,) (776,) 


Processing files:  26%|██▌       | 3177/12120 [29:10<47:59,  3.11it/s]  

Error processing Processed Recordings\Baeolophus_bicolor_Miami_Township_near__Cleves_Hamilton_County_Ohio_713584_segment_17.wav: operands could not be broadcast together with shapes (1000,) (563,) 




Error processing Processed Recordings\Baeolophus_bicolor_Beachwood_Cuyahoga_County_Ohio_287347_segment_2.wav: operands could not be broadcast together with shapes (500,) (80,) 




Error processing Processed Recordings\Poecile_carolinensis_Lawrence_Woods_SNP_417997_segment_3.wav: operands could not be broadcast together with shapes (500,) (404,) 


Processing files:  29%|██▊       | 3481/12120 [31:49<3:16:06,  1.36s/it]

Error processing Processed Recordings\Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_69.wav: operands could not be broadcast together with shapes (1000,) (901,) 


Processing files:  29%|██▊       | 3482/12120 [31:51<3:10:33,  1.32s/it]

Error processing Processed Recordings\Poecile_atricapillus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814911_segment_70.wav: operands could not be broadcast together with shapes (1000,) (630,) 


Processing files:  29%|██▉       | 3551/12120 [33:41<4:08:45,  1.74s/it] 

Error processing Processed Recordings\Eremophila_alpestris_Michigan_Monroe_County_16969_segment_2.wav: operands could not be broadcast together with shapes (1000,) (542,) 


Processing files:  30%|███       | 3666/12120 [36:22<1:56:33,  1.21it/s]

Error processing Processed Recordings\Tachycineta_bicolor_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827054_segment_78.wav: operands could not be broadcast together with shapes (1000,) (904,) 


Processing files:  32%|███▏      | 3847/12120 [39:38<46:00,  3.00it/s]

Error processing Processed Recordings\Progne_subis_Whitewater_Township_near__Cleves_Hamilton_County_Ohio_817716_segment_14.wav: operands could not be broadcast together with shapes (1000,) (713,) 


Processing files:  32%|███▏      | 3912/12120 [40:03<39:31,  3.46it/s]  

Error processing Processed Recordings\Cistothorus_palustris_Jerusalem_Township_near__Bono_Lucas_County_Ohio_784047_segment_10.wav: operands could not be broadcast together with shapes (1000,) (621,) 


Processing files:  33%|███▎      | 3982/12120 [40:30<43:26,  3.12it/s]

Error processing Processed Recordings\Cistothorus_palustris_Ohio_Ottawa_NWR_178295_segment_20.wav: operands could not be broadcast together with shapes (1000,) (629,) 


Processing files:  34%|███▍      | 4126/12120 [42:11<1:39:54,  1.33it/s]

Error processing Processed Recordings\Thryothorus_ludovicianus_Anderson_Township_near__Cincinnati_Hamilton_County_Ohio_542519_segment_3.wav: operands could not be broadcast together with shapes (500,) (51,) 


Processing files:  35%|███▌      | 4260/12120 [44:09<1:16:46,  1.71it/s]

Error processing Processed Recordings\Troglodytes_aedon_Green_Township_near__Cincinnati_Hamilton_County_Ohio_726630_segment_12.wav: operands could not be broadcast together with shapes (1000,) (765,) 


Processing files:  37%|███▋      | 4425/12120 [46:58<1:13:10,  1.75it/s]

Error processing Processed Recordings\Troglodytes_aedon_Ohio_Crane_Creek_SP_17052_segment_4.wav: operands could not be broadcast together with shapes (500,) (58,) 


Processing files:  38%|███▊      | 4558/12120 [49:16<1:11:53,  1.75it/s]

Error processing Processed Recordings\Polioptila_caerulea_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_816237_segment_15.wav: operands could not be broadcast together with shapes (1000,) (957,) 
Error processing Processed Recordings\Polioptila_caerulea_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_816237_segment_16.wav: operands could not be broadcast together with shapes (1000,) (536,) 


Processing files:  38%|███▊      | 4627/12120 [50:22<1:34:40,  1.32it/s]

Error processing Processed Recordings\Polioptila_caerulea_Miami_Township_near__Cleves_Hamilton_County_Ohio_645807_segment_1.wav: operands could not be broadcast together with shapes (1000,) (595,) 


Processing files:  40%|███▉      | 4843/12120 [53:51<1:12:01,  1.68it/s]

Error processing Processed Recordings\Sitta_carolinensis_Harrison_Township_near__Harrison_Hamilton_County_Ohio_807181_segment_38.wav: operands could not be broadcast together with shapes (1000,) (650,) 


Processing files:  41%|████      | 4957/12120 [55:35<1:47:53,  1.11it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814891_segment_17.wav: operands could not be broadcast together with shapes (500,) (313,) 


Processing files:  41%|████▏     | 5027/12120 [56:36<1:25:09,  1.39it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814888_segment_18.wav: operands could not be broadcast together with shapes (1000,) (737,) 


Processing files:  42%|████▏     | 5109/12120 [57:41<43:56,  2.66it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Lawrence_Woods_SNP_477577_segment_5.wav: operands could not be broadcast together with shapes (500,) (443,) 


Processing files:  43%|████▎     | 5164/12120 [58:36<2:38:44,  1.37s/it]

Error processing Processed Recordings\Dumetella_carolinensis_Sheldon_Marsh_State_Nature_Preserve_723020_segment_7.wav: operands could not be broadcast together with shapes (1000,) (507,) 


Processing files:  43%|████▎     | 5189/12120 [59:07<1:33:45,  1.23it/s]

Error processing Processed Recordings\Dumetella_carolinensis_Magee_Marsh_boardwalk_west_end_262807_segment_1.wav: operands could not be broadcast together with shapes (500,) (475,) 


Processing files:  44%|████▍     | 5318/12120 [1:01:11<1:51:06,  1.02it/s]

Error processing Processed Recordings\Mimus_polyglottos_Green_Township_near__Cincinnati_Hamilton_County_Ohio_807536_segment_29.wav: operands could not be broadcast together with shapes (1000,) (526,) 


Processing files:  44%|████▍     | 5375/12120 [1:02:54<2:19:43,  1.24s/it] 

Error processing Processed Recordings\Mimus_polyglottos_Cincinnati_Hamilton_County_Ohio_677295_segment_25.wav: operands could not be broadcast together with shapes (1000,) (927,) 


Processing files:  44%|████▍     | 5388/12120 [1:03:12<2:43:07,  1.45s/it]

Error processing Processed Recordings\Mimus_polyglottos_Cincinnati_Hamilton_County_Ohio_677295_segment_38.wav: operands could not be broadcast together with shapes (500,) (448,) 


Processing files:  46%|████▌     | 5537/12120 [1:05:56<43:49,  2.50it/s]

Error processing Processed Recordings\Mimus_polyglottos_Dunham_Recreation_Complex_near__Cincinnati_Hamilton_County_Ohio_646400_segment_107.wav: operands could not be broadcast together with shapes (500,) (375,) 




Error processing Processed Recordings\Toxostoma_rufum_Hanover_Township_Licking_County_Ohio_893646_segment_51.wav: operands could not be broadcast together with shapes (1000,) (764,) 




Error processing Processed Recordings\Toxostoma_rufum_Hanover_Township_Licking_County_Ohio_893646_segment_64.wav: operands could not be broadcast together with shapes (500,) (330,) 


Processing files:  47%|████▋     | 5701/12120 [1:07:27<47:57,  2.23it/s]  

Error processing Processed Recordings\Toxostoma_rufum_Ross_Township_near__Hamilton_Butler_County_Ohio_632519_segment_2.wav: operands could not be broadcast together with shapes (500,) (41,) 


Processing files:  48%|████▊     | 5775/12120 [1:08:02<1:50:38,  1.05s/it]

Error processing Processed Recordings\Toxostoma_rufum_Virginia_Kendall_Lake_Peninsula_Summit_County_Ohio_412205_segment_18.wav: operands could not be broadcast together with shapes (1000,) (953,) 


Processing files:  48%|████▊     | 5868/12120 [1:08:57<33:42,  3.09it/s]

Error processing Processed Recordings\Hylocichla_mustelina_Packer-Hammersmith_Center_near__Oregon_Lucas_County_Ohio_784058_segment_59.wav: operands could not be broadcast together with shapes (500,) (111,) 




Error processing Processed Recordings\Hylocichla_mustelina_Michigan_Monroe_County_16970_segment_5.wav: operands could not be broadcast together with shapes (1000,) (634,) 


Processing files:  50%|█████     | 6085/12120 [1:11:57<33:59,  2.96it/s]

Error processing Processed Recordings\Turdus_migratorius_Harrison_Township_near__Harrison_Hamilton_County_Ohio_728269_segment_6.wav: operands could not be broadcast together with shapes (1000,) (964,) 


Processing files:  51%|█████     | 6163/12120 [1:12:28<29:18,  3.39it/s]

Error processing Processed Recordings\Turdus_migratorius_Bricker_Blvd_near_Cranbrook_Elementary_Columbus_OH_103571_segment_17.wav: operands could not be broadcast together with shapes (500,) (66,) 


Processing files:  53%|█████▎    | 6369/12120 [1:13:52<28:09,  3.40it/s]

Error processing Processed Recordings\Spizella_passerina_Miami_Township_near__Cleves_Hamilton_County_Ohio_645554_segment_28.wav: operands could not be broadcast together with shapes (1000,) (800,) 


Processing files:  54%|█████▍    | 6583/12120 [1:18:57<1:21:48,  1.13it/s]

Error processing Processed Recordings\Spizella_pusilla_Marseilles_Township_near__Upper_Sandusky_Wyandot_County_Ohio_637645_segment_16.wav: operands could not be broadcast together with shapes (500,) (94,) 


Processing files:  55%|█████▍    | 6626/12120 [1:19:59<1:57:55,  1.29s/it]

Error processing Processed Recordings\Spizella_pusilla_Michigan_Monroe_County_16986_segment_21.wav: operands could not be broadcast together with shapes (1000,) (526,) 




Error processing Processed Recordings\Pooecetes_gramineus_Michigan_Monroe_County_17095_segment_8.wav: operands could not be broadcast together with shapes (500,) (301,) 


Processing files:  55%|█████▌    | 6695/12120 [1:21:51<1:39:43,  1.10s/it]

Error processing Processed Recordings\Pooecetes_gramineus_Michigan_Monroe_County_16978_segment_5.wav: operands could not be broadcast together with shapes (500,) (266,) 


Processing files:  57%|█████▋    | 6909/12120 [1:24:28<23:07,  3.75it/s]

Error processing Processed Recordings\Pipilo_erythrophthalmus_Edge_of_Appalachia_Preserve_Adams_Cty_Ohio_939170_segment_6.wav: operands could not be broadcast together with shapes (500,) (134,) 


Processing files:  59%|█████▊    | 7111/12120 [1:25:46<38:32,  2.17it/s]

Error processing Processed Recordings\Icteria_virens_Miami_Township_near__Cleves_Hamilton_County_Ohio_807580_segment_32.wav: operands could not be broadcast together with shapes (1000,) (733,) 


Processing files:  59%|█████▉    | 7167/12120 [1:26:07<23:58,  3.44it/s]

Error processing Processed Recordings\Sturnella_magna_Voice_of_America_MetroPark_near__Middletown_Butler_County_Ohio_643650_segment_4.wav: operands could not be broadcast together with shapes (1000,) (985,) 


Processing files:  59%|█████▉    | 7190/12120 [1:26:15<25:06,  3.27it/s]

Error processing Processed Recordings\Icterus_galbula_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_820108_segment_15.wav: operands could not be broadcast together with shapes (1000,) (869,) 


Processing files:  61%|██████    | 7409/12120 [1:27:36<26:47,  2.93it/s]

Error processing Processed Recordings\Icterus_galbula_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_818926_segment_47.wav: operands could not be broadcast together with shapes (1000,) (752,) 




Error processing Processed Recordings\Icterus_galbula_Tanager_Meadow_Reservable_Shelter_near__Harrison_Hamilton_County_Ohio_807347_segment_1.wav: operands could not be broadcast together with shapes (1000,) (700,) 


Processing files:  64%|██████▎   | 7718/12120 [1:29:29<20:46,  3.53it/s]

Error processing Processed Recordings\Icterus_spurius_Heritage_Trail_Metro_Park_Hayden_Run_Rd_parking_area_103502_segment_33.wav: operands could not be broadcast together with shapes (1000,) (564,) 


Processing files:  65%|██████▍   | 7819/12120 [1:30:07<26:19,  2.72it/s]

Error processing Processed Recordings\Agelaius_phoeniceus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814755_segment_56.wav: operands could not be broadcast together with shapes (500,) (17,) 


Processing files:  65%|██████▍   | 7857/12120 [1:30:22<28:18,  2.51it/s]

Error processing Processed Recordings\Agelaius_phoeniceus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814755_segment_95.wav: operands could not be broadcast together with shapes (1000,) (771,) 


Processing files:  65%|██████▌   | 7903/12120 [1:30:39<20:05,  3.50it/s]

Error processing Processed Recordings\Agelaius_phoeniceus_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_814755_segment_140.wav: operands could not be broadcast together with shapes (1000,) (509,) 


Processing files:  66%|██████▌   | 8018/12120 [1:31:20<17:43,  3.86it/s]

Error processing Processed Recordings\Agelaius_phoeniceus_Wegerzyn_garden_near__Dayton_Montgomery_County_Ohio_807222_segment_0.wav: operands could not be broadcast together with shapes (1000,) (516,) 


Processing files:  67%|██████▋   | 8089/12120 [1:31:46<19:25,  3.46it/s]

Error processing Processed Recordings\Agelaius_phoeniceus_Battelle_Darby_Metro_Park_-_Wetlands_417853_segment_3.wav: operands could not be broadcast together with shapes (500,) (387,) 


Processing files:  70%|██████▉   | 8452/12120 [1:34:06<21:12,  2.88it/s]

Error processing Processed Recordings\Parkesia_motacilla_Harrison_Township_near__Harrison_Hamilton_County_Ohio_805820_segment_15.wav: operands could not be broadcast together with shapes (1000,) (514,) 


Processing files:  70%|███████   | 8500/12120 [1:34:23<15:18,  3.94it/s]

Error processing Processed Recordings\Parkesia_motacilla_Harrison_Township_near__Harrison_Hamilton_County_Ohio_714922_segment_45.wav: operands could not be broadcast together with shapes (1000,) (782,) 


Processing files:  73%|███████▎  | 8887/12120 [1:36:53<26:30,  2.03it/s]

Error processing Processed Recordings\Mniotilta_varia_Ohio_Crane_Creek_SP_17015_segment_5.wav: operands could not be broadcast together with shapes (500,) (128,) 


Processing files:  75%|███████▍  | 9040/12120 [1:37:54<21:28,  2.39it/s]

Error processing Processed Recordings\Protonotaria_citrea_Pelee_near__Leamington_Essex_County_Ontario_476998_segment_1.wav: operands could not be broadcast together with shapes (1000,) (741,) 


Processing files:  75%|███████▍  | 9081/12120 [1:38:09<18:17,  2.77it/s]

Error processing Processed Recordings\Protonotaria_citrea_Anderson_Township_near__Cincinnati_Hamilton_County_Ohio_552556_segment_4.wav: operands could not be broadcast together with shapes (1000,) (508,) 


Processing files:  79%|███████▉  | 9606/12120 [1:41:26<11:49,  3.54it/s]

Error processing Processed Recordings\Geothlypis_trichas_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_828478_segment_22.wav: operands could not be broadcast together with shapes (500,) (113,) 


Processing files:  82%|████████▏ | 9929/12120 [1:43:24<08:57,  4.07it/s]

Error processing Processed Recordings\Setophaga_ruticilla_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_825242_segment_13.wav: operands could not be broadcast together with shapes (500,) (342,) 


Processing files:  84%|████████▍ | 10194/12120 [1:45:05<09:28,  3.39it/s]

Error processing Processed Recordings\Setophaga_cerulea_Shawnee_State_Forest_Scioto_County_Ohio_417448_segment_2.wav: operands could not be broadcast together with shapes (1000,) (715,) 


Processing files:  84%|████████▍ | 10218/12120 [1:45:13<12:27,  2.54it/s]

Error processing Processed Recordings\Setophaga_cerulea_Miami_Township_near__North_Bend_Hamilton_County_Ohio_646112_segment_4.wav: operands could not be broadcast together with shapes (500,) (19,) 


Processing files:  85%|████████▌ | 10340/12120 [1:45:58<10:08,  2.93it/s]

Error processing Processed Recordings\Setophaga_magnolia_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_828025_segment_29.wav: operands could not be broadcast together with shapes (500,) (477,) 


Processing files:  85%|████████▌ | 10351/12120 [1:46:02<07:22,  4.00it/s]

Error processing Processed Recordings\Setophaga_magnolia_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_828022_segment_2.wav: operands could not be broadcast together with shapes (500,) (140,) 




Error processing Processed Recordings\Setophaga_magnolia_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_828022_segment_20.wav: operands could not be broadcast together with shapes (500,) (386,) 


Processing files:  86%|████████▌ | 10377/12120 [1:46:11<06:55,  4.20it/s]

Error processing Processed Recordings\Setophaga_magnolia_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_828022_segment_28.wav: operands could not be broadcast together with shapes (500,) (160,) 


Processing files:  86%|████████▌ | 10419/12120 [1:46:27<10:19,  2.75it/s]

Error processing Processed Recordings\Setophaga_magnolia_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_828020_segment_36.wav: operands could not be broadcast together with shapes (1000,) (718,) 


Processing files:  87%|████████▋ | 10493/12120 [1:46:57<08:36,  3.15it/s]

Error processing Processed Recordings\Setophaga_castanea_Park_End_Dr_Montgomery_County_Ohio_477393_segment_5.wav: operands could not be broadcast together with shapes (1000,) (972,) 


Processing files:  89%|████████▊ | 10736/12120 [1:48:29<06:52,  3.36it/s]

Error processing Processed Recordings\Setophaga_pensylvanica_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827845_segment_10.wav: operands could not be broadcast together with shapes (1000,) (598,) 


Processing files:  89%|████████▉ | 10815/12120 [1:49:00<07:08,  3.05it/s]

Error processing Processed Recordings\Setophaga_pensylvanica_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_827845_segment_90.wav: operands could not be broadcast together with shapes (1000,) (761,) 


Processing files:  90%|█████████ | 10910/12120 [1:49:36<08:52,  2.27it/s]

Error processing Processed Recordings\Setophaga_pinus_Cuyahoga_Valley_National_Park_Wetmore_Trail_662930_segment_4.wav: operands could not be broadcast together with shapes (500,) (236,) 


Processing files:  91%|█████████ | 10978/12120 [1:50:00<07:58,  2.39it/s]

Error processing Processed Recordings\Setophaga_dominica_Pater_Wildlife_Area_Butler_County_Ohio_44434_segment_2.wav: operands could not be broadcast together with shapes (500,) (457,) 


Processing files:  91%|█████████▏| 11068/12120 [1:50:34<05:08,  3.40it/s]

Error processing Processed Recordings\Cardellina_canadensis_OSU's_Waterman_Farm_wood_lot_Columbus_Franklin_County_Ohio_179679_segment_4.wav: operands could not be broadcast together with shapes (500,) (389,) 


Processing files:  94%|█████████▍| 11436/12120 [1:56:13<12:08,  1.06s/it]

Error processing Processed Recordings\Pheucticus_ludovicianus_Oak_Openings_Metropark_Ohio_418070_segment_24.wav: operands could not be broadcast together with shapes (500,) (29,) 


Processing files:  95%|█████████▌| 11540/12120 [1:59:06<10:43,  1.11s/it]

Error processing Processed Recordings\Cardinalis_cardinalis_Miami_Township_near__Cleves_Hamilton_County_Ohio_713583_segment_6.wav: operands could not be broadcast together with shapes (1000,) (742,) 


Processing files:  97%|█████████▋| 11722/12120 [2:02:30<02:06,  3.15it/s]

Error processing Processed Recordings\Cardinalis_cardinalis_Tanglewood_Lane_Cincinnati_Ohio_302932_segment_137.wav: operands could not be broadcast together with shapes (1000,) (602,) 


Processing files:  99%|█████████▉| 12012/12120 [2:06:13<02:21,  1.31s/it]

Error processing Processed Recordings\Sonus_naturalis_Jerusalem_Township_near__Bono_Lucas_County_Ohio_784063_segment_8.wav: operands could not be broadcast together with shapes (1000,) (767,) 


Processing files: 100%|█████████▉| 12060/12120 [2:06:51<00:27,  2.19it/s]

Error processing Processed Recordings\Sonus_naturalis_Lawrence_Woods_SNP_417979_segment_25.wav: operands could not be broadcast together with shapes (1000,) (921,) 


Processing files: 100%|██████████| 12120/12120 [2:07:19<00:00,  1.59it/s]


Augmentation complete. 36069 new samples created.


### Extract features from the processed and augmented audio files.

In [40]:
augmented_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48189 entries, 0 to 48188
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   genus            48189 non-null  object 
 1   species          48189 non-null  object 
 2   latitude         48189 non-null  float64
 3   longitude        48189 non-null  float64
 4   quality          48189 non-null  object 
 5   file_name        48189 non-null  object 
 6   simplified_type  48189 non-null  object 
 7   season           48189 non-null  object 
 8   time_of_day      48189 non-null  object 
 9   length_seconds   48189 non-null  int64  
 10  processed_file   48189 non-null  object 
 11  augmentations    36069 non-null  object 
dtypes: float64(2), int64(1), object(9)
memory usage: 4.4+ MB


In [43]:
augmented_data = augmented_data.fillna("None")

In [67]:
def is_valid_audio(audio, sr, min_duration=0.1, silence_threshold=-60):
    """Check if the audio segment is valid (not too short and not silent)."""
    duration = librosa.get_duration(y=audio, sr=sr)
    if duration < min_duration:
        return False
    
    # Check if the audio is mostly silent
    db = librosa.amplitude_to_db(np.abs(audio), ref=np.max)
    if np.mean(db) < silence_threshold:
        return False
    
    return True


def extract_features(audio, sr):
    # Mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    
    # MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    
    # Spectral Centroid
    spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
    
    # Chroma Features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
    
    # Zero Crossing Rate
    zero_crossing_rate = librosa.feature.zero_crossing_rate(audio)[0]
    
    # Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
    
    return {
        'mel_spectrogram_db': mel_spec_db,
        'mfccs': mfccs,
        'spectral_centroids': spectral_centroids,
        'chroma': chroma,
        'zero_crossing_rate': zero_crossing_rate,
        'spectral_rolloff': spectral_rolloff
    }

def summarize_feature(feature):
    if feature.ndim == 1:
        return [np.mean(feature), np.std(feature), np.max(feature)]
    elif feature.ndim == 2:
        return np.hstack([
            np.mean(feature, axis=1),
            np.std(feature, axis=1),
            np.max(feature, axis=1)
        ])
    
def save_mel_spectrogram(mel_spec, output_dir, base_filename, sr):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spec, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-spectrogram')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{base_filename}_mel_spectrogram.png"))
    plt.close()
    

In [68]:
def process_audio_file(file_path, output_dir):
    try:
        # Load audio file
        audio, sr = librosa.load(file_path, sr=None)
        
        # Check if audio segment is valid
        if not is_valid_audio(audio, sr):
            print(f"Warning: Audio file {file_path} is too short or silent. Skipping.")
            return None, None
        
        # Extract features
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            features = extract_features(audio, sr)
        
        # Summarize features
        feature_summary = {}
        for key, value in features.items():
            if key != 'mel_spectrogram_db':
                feature_summary[f"{key}_summary"] = summarize_feature(value)
        
        # Create feature vector
        feature_vector = np.hstack([
            feature_summary.get('mfccs_summary', np.array([])),
            feature_summary.get('spectral_centroids_summary', np.array([])),
            feature_summary.get('chroma_summary', np.array([])),
            feature_summary.get('zero_crossing_rate_summary', np.array([])),
            feature_summary.get('spectral_rolloff_summary', np.array([]))
        ])
        
        # Save mel-spectrogram as image
        base_filename = os.path.splitext(os.path.basename(file_path))[0]
        save_mel_spectrogram(features['mel_spectrogram_db'], output_dir, base_filename, sr)
        
        return feature_vector, features
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None, None

def process_audio_files(df, base_dir, output_dir):
    feature_data = []
    skipped_files = []
    
    for _, row in tqdm(df.iterrows(), total=len(df)):
        possible_paths = [
            os.path.join(base_dir, 'Augmented Recordings', row['processed_file']),
            os.path.join(base_dir, 'Processed Recordings', row['processed_file'])
        ]
        
        file_path = next((path for path in possible_paths if os.path.exists(path)), None)
        
        if file_path is None:
            print(f"File not found: {row['processed_file']}")
            skipped_files.append(row['processed_file'])
            continue
        
        feature_vector, full_features = process_audio_file(file_path, output_dir)
        
        if feature_vector is not None and full_features is not None:
            feature_dict = {
                'processed_file': row['processed_file'],
                'feature_vector': feature_vector,
            }
            
            # Add full feature arrays
            for key, value in full_features.items():
                feature_dict[f"{key}_full"] = value
            
            feature_data.append(feature_dict)
        else:
            skipped_files.append(row['processed_file'])
    
    print(f"Total files skipped: {len(skipped_files)}")
    return feature_data, skipped_files

In [70]:
# Select a random file
random_file = random.choice(augmented_data['processed_file'])
possible_paths = [
    os.path.join('Augmented Recordings', random_file),
    os.path.join('Processed Recordings', random_file)
]
file_path = next((path for path in possible_paths if os.path.exists(path)), None)

if file_path is None:
    print(f"Error: File not found - {random_file}")
else:
    print(f"Testing feature extraction on file: {file_path}")

    # Load the audio file
    audio, sr = librosa.load(file_path, sr=None)

    # Extract features
    features = extract_features(audio, sr)

    # Print a summary of each feature
    for feature_name, feature_data in features.items():
        if feature_name == 'mel_spectrogram_db':
            print(f"{feature_name} shape: {feature_data.shape}")
        else:
            print(f"{feature_name} shape: {feature_data.shape}, mean: {np.mean(feature_data):.4f}, std: {np.std(feature_data):.4f}")

    # Summarize features
    feature_summary = {}
    for key, value in features.items():
        if key != 'mel_spectrogram_db':
            feature_summary[f"{key}_summary"] = summarize_feature(value)

    # Print summary of summarized features
    print("\nSummarized Features:")
    for key, value in feature_summary.items():
        print(f"{key} shape: {value.shape}, mean: {np.mean(value):.4f}, std: {np.std(value):.4f}")

    # Create feature vector
    feature_vector = np.hstack([
        feature_summary.get('mfccs_summary', np.array([])),
        feature_summary.get('spectral_centroids_summary', np.array([])),
        feature_summary.get('chroma_summary', np.array([])),
        feature_summary.get('zero_crossing_rate_summary', np.array([])),
        feature_summary.get('spectral_rolloff_summary', np.array([]))
    ])

    print(f"\nFinal feature vector shape: {feature_vector.shape}")

    # Save mel-spectrogram as image
    output_dir = 'Test'
    os.makedirs(output_dir, exist_ok=True)
    base_filename = os.path.splitext(os.path.basename(file_path))[0]
    save_mel_spectrogram(features['mel_spectrogram_db'], output_dir, base_filename, sr)

    print(f"Mel-spectrogram saved as: {base_filename}_mel_spectrogram.png in {output_dir}")

print("Feature extraction test complete.")

  audio, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: 'Processed Recordings\\Poecile_carolinensis_Lawrence_Woods_SNP_417998.wav'

In [71]:
# Usage

base_dir = os.getcwd()
output_dir = 'mel-spectrograms'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

print(f"Current directory (base_dir): {base_dir}")
print(f"Output directory: {output_dir}")

feature_data, skipped_files = process_audio_files(augmented_data, base_dir, output_dir)

# Convert feature_data to DataFrame
feature_df = pd.DataFrame([{'processed_file': item['processed_file'], 'feature_vector': item['feature_vector']} for item in feature_data])

# Merge the new feature DataFrame with the existing final_data DataFrame
final_data = pd.merge(augmented_data, feature_df, on='processed_file', how='left')

# Drop rows corresponding to skipped files
final_data = final_data[~final_data['processed_file'].isin(skipped_files)]

print(final_data.info())

print(f"\nTotal files in augmented_data: {len(augmented_data)}")
print(f"Files successfully processed: {len(feature_df)}")
print(f"Files skipped (too short or silent): {len(skipped_files)}")
print(f"Files in final_data after dropping skipped files: {len(final_data)}")

Current directory (base_dir): c:\Users\16148\Desktop\Projects\bird_call_project
Output directory: mel-spectrograms


  0%|          | 26/48189 [00:09<4:54:29,  2.73it/s]



  0%|          | 111/48189 [01:26<14:01:05,  1.05s/it]



  0%|          | 120/48189 [01:37<19:03:42,  1.43s/it]



  0%|          | 147/48189 [02:08<16:32:27,  1.24s/it]



  0%|          | 167/48189 [02:32<15:15:47,  1.14s/it]



  0%|          | 188/48189 [02:56<15:25:05,  1.16s/it]



  1%|          | 246/48189 [03:26<4:25:56,  3.00it/s] 



  1%|          | 279/48189 [03:40<8:08:14,  1.64it/s]



  1%|          | 282/48189 [03:42<8:45:21,  1.52it/s]



  1%|          | 286/48189 [03:45<9:50:26,  1.35it/s]



  1%|          | 295/48189 [03:54<14:00:51,  1.05s/it]



  1%|          | 301/48189 [03:59<13:32:12,  1.02s/it]



  1%|          | 342/48189 [04:44<14:14:19,  1.07s/it]



  1%|          | 346/48189 [04:48<14:30:48,  1.09s/it]



  1%|          | 350/48189 [04:52<14:16:56,  1.07s/it]



  1%|          | 501/48189 [07:36<5:13:27,  2.54it/s] 



  1%|          | 556/48189 [08:00<11:26:12,  1.16it/s]


KeyboardInterrupt: 

In [None]:
final_data.info()

In [None]:
# Save DataFrame to CSV
final_data.to_csv('final_data.csv', index=False)

print("Processing complete. Summary data saved to 'final_data.csv'.")