<a href="https://colab.research.google.com/github/lightsixer/P2deeplearning/blob/dev/notebooks/P2_PreProcessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install noisereduce

Collecting noisereduce
  Downloading noisereduce-3.0.0-py3-none-any.whl (22 kB)
Installing collected packages: noisereduce
Successfully installed noisereduce-3.0.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import noisereduce as nr
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def reduce_noise(input_file):
    y, sr = librosa.load(input_file, sr=None)
    return nr.reduce_noise(y=y, sr=sr), sr

In [None]:
def detect_active_regions(y, sr, threshold=0.02, frame_length=2048, hop_length=512, min_region_duration=5):
    D = np.abs(librosa.stft(y, n_fft=frame_length, hop_length=hop_length))
    amplitude = np.sum(D, axis=0)

    # Find active frames
    active_frames = np.where(amplitude > threshold)[0]

    # Merge overlapping active regions
    active_regions = []
    start_frame = active_frames[0]
    for frame in active_frames:
        if frame - start_frame > int(min_region_duration * sr / hop_length):
            active_regions.append((start_frame, frame))
            start_frame = frame

    # Convert active regions to time
    active_times = []
    for start_frame, end_frame in active_regions:
        start_time = librosa.frames_to_time(start_frame, sr=sr, hop_length=hop_length)
        end_time = librosa.frames_to_time(end_frame, sr=sr, hop_length=hop_length)
        active_times.append((start_time, end_time))

    return active_times


In [None]:
def extract_active_segment(y, sr, active_times, segment_duration=5.0):
    if not active_times.size:
        return None, None
    start_time = active_times[0]
    end_time = start_time + segment_duration
    if end_time > librosa.get_duration(y=y, sr=sr):
        end_time = librosa.get_duration(y=y, sr=sr)
    return librosa.time_to_samples(start_time, sr=sr), librosa.time_to_samples(end_time, sr=sr)

In [None]:
def generate_spectrogram(y, sr, save_path):
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_S = librosa.power_to_db(S, ref=np.max)
    plt.figure(figsize=(12, 4))
    librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+02.0f dB')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

BRD:  Enhanced the below function to add image cropping based on observed range.

Also base spectrograph file names on original .ogg file name and output to a relevant structure

In [None]:
from PIL import Image
from pathlib import Path

LEFT = 80
TOP = 21
RIGHT = 964
BOTTOM = 343

def process_audio(input_file, output_spectrogram_dir, bird_class):
    y, sr = reduce_noise(input_file)
    active_regions = detect_active_regions(y, sr)

    base_file_name = Path(input_file).stem
    p = Path(os.path.join(output_spectrogram_dir, bird_class))
    if not p.exists():
      p.mkdir()

    if not active_regions:
        print("No active regions detected.")
        return

    for i, (start_time, end_time) in enumerate(active_regions):
        start_sample = librosa.time_to_samples(start_time, sr=sr)
        end_sample = librosa.time_to_samples(end_time, sr=sr)

        if start_sample is not None and end_sample is not None:
            #output_spectrogram_path = f"{output_spectrogram_dir}/spectrogram_{i+1}.png"
            output_spectrogram_path = os.path.join(output_spectrogram_dir, bird_class, f"{base_file_name}_{i+1}.png")
            generate_spectrogram(y[start_sample:end_sample], sr, output_spectrogram_path)
            # also crop and save
            img = Image.open(output_spectrogram_path)
            crop_img = img.crop((LEFT, TOP, RIGHT, BOTTOM))
            #crop_img.save(f"{output_spectrogram_dir}/crop_spectrogram_{i+1}.png", 'PNG')
            crop_img.save(output_spectrogram_path, 'PNG')
            print(f"Spectrogram {i+1} saved as {output_spectrogram_path}")
        else:
            print(f"Skipping spectrogram {i+1} due to insufficient active audio.")


Preprocessing using Tiru's parsing functions
----------------------------------------------------------------------------
First we load the random sample sets into dataframes...

In [None]:
import os
import pandas as pd

CSV_PATH = '/content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random_split_10'
AUDIO_PATH = '/content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/train_audio'
SPECTRO_PATH = '/content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data'

train_csv_path = os.path.join(CSV_PATH, 'random_train_metadata.csv')
test_csv_path = os.path.join(CSV_PATH, 'random_test_metadata.csv')
reserved_csv_path = os.path.join(CSV_PATH, 'random_reserved_metadata.csv')
train_md = pd.read_csv(train_csv_path)
test_md = pd.read_csv(test_csv_path)
reserved_md = pd.read_csv(reserved_csv_path)
display(train_md)

Function to iterate through dataframe rows for subset, process audio and generate spectrographs

In [None]:
def pre_process(df_meta, output_path):
  for index in range(len(df_meta)):
    label = df_meta.loc[index, 'primary_label']
    file_name = df_meta.loc[index, 'filename']
    print(label, file_name)
    file_path = os.path.join(AUDIO_PATH, file_name)
    print(file_path)
    #print(os.path.basename(file_path))
    #print(Path(file_path).stem)
    process_audio(file_path, output_path, label)

train_output_path = os.path.join(SPECTRO_PATH, 'train')
test_output_path = os.path.join(SPECTRO_PATH, 'test')
reserved_output_path = os.path.join(SPECTRO_PATH, 'reserved')

In [None]:
pre_process(train_md, train_output_path)

somgre1 somgre1/XC654730.ogg
/content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/train_audio/somgre1/XC654730.ogg
Spectrogram 1 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/train/somgre1/XC654730_1.png
Spectrogram 2 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/train/somgre1/XC654730_2.png
Spectrogram 3 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/train/somgre1/XC654730_3.png
Spectrogram 4 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/train/somgre1/XC654730_4.png
Spectrogram 5 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/train/somgre1/XC654730_5.png
Spectrogram 6 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/train/somgre1/XC654730_

In [None]:
pre_process(test_md, test_output_path)

somgre1 somgre1/XC411731.ogg
/content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/train_audio/somgre1/XC411731.ogg
Spectrogram 1 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/test/somgre1/XC411731_1.png
Spectrogram 2 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/test/somgre1/XC411731_2.png
Spectrogram 3 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/test/somgre1/XC411731_3.png
Spectrogram 4 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/test/somgre1/XC411731_4.png
Spectrogram 5 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/test/somgre1/XC411731_5.png
somgre1 somgre1/XC459326.ogg
/content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/train_audio/somgre1/XC459326.ogg
Spectrog

In [None]:
pre_process(reserved_md, reserved_output_path)

somgre1 somgre1/XC216433.ogg
/content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/train_audio/somgre1/XC216433.ogg
Spectrogram 1 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/reserved/somgre1/XC216433_1.png
Spectrogram 2 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/reserved/somgre1/XC216433_2.png
Spectrogram 3 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/reserved/somgre1/XC216433_3.png
Spectrogram 4 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/reserved/somgre1/XC216433_4.png
Spectrogram 5 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/reserved/somgre1/XC216433_5.png
Spectrogram 6 saved as /content/drive/MyDrive/P2_DeepLearning/birdclef-2023.zip (Unzipped Files)/random10_prep_data/reserved