In [1]:
#%pip install quilt3[pyarrow]==5.3.1
#%pip install librosa
#%pip install azure-storage-blob
#%pip install python-dotenv

In [2]:
import os
import quilt3
import pandas as pd
import numpy as np
from dotenv import load_dotenv

import librosa
import soundfile as sf
import functools

from azure.storage.blob import BlobServiceClient

import matplotlib.pyplot as plt

import tqdm as notebook_tqdm

In [3]:
def list_folders(directory):
    folders = []
    with os.scandir(directory) as entries:
        for entry in entries:
            if entry.is_dir():
                folders.append(entry.name)
    return folders


def list_files(directory, extension):
    files = []
    for name in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, name)):
            if name.endswith(extension):
                files.append(name)
    return files

In [7]:
data_download_folder = "../data"
annotations_path = f"{data_download_folder}/raw/annotations"
audio_path = f"{data_download_folder}/raw/audio"
extracted_calls_path = f"{data_download_folder}/preprocessed"

In [5]:
isExist = os.path.exists(extracted_calls_path)
if not isExist:
    os.makedirs(extracted_calls_path)

# Extract humpback whales vocalizations from raw audio files

In [9]:
annotation_filenames = list_files(annotations_path, ".txt")
audio_filenames = list_files(audio_path, ".flac")

print(annotation_filenames[1])
df = pd.read_csv(f"{annotations_path}/{annotation_filenames[1]}", sep="\t")
df = df. sort_values(['Selection'], ascending=[True])
df.head(10)

OS_10_03_2021_19_34_00_.Table.1.selections.txt


Unnamed: 0,Selection,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Call Type
0,1,1646.999571,1648.733984,628.263,1297.059,Ascending moan
1,2,1653.223452,1654.641001,749.862,1134.926,Moan
2,3,1659.862135,1660.595925,770.129,1033.594,Moan
3,4,1661.796673,1663.747887,283.732,709.329,Ascending moan
4,5,1678.344185,1680.262045,506.664,1013.327,Moan
5,6,1684.197828,1687.39982,303.998,1155.193,Descending moan
6,7,1689.378332,1689.628487,2229.32,2452.252,Chirp
7,8,1691.546347,1693.180698,709.329,1114.66,Ascending moan
8,9,1695.532161,1696.599492,162.132,466.131,Growl
9,10,1756.907157,1758.291352,729.596,993.061,Ascending moan


# Extract in-between noise without declared sound

In [None]:
for audio_file_name in audio_filenames:
    x, sr = librosa.load(f"{audio_path}/{audio_file_name}", sr=None)  # sr = None means that we use original sample rate
    print(f"Processing {audio_path}/{audio_file_name}")
    annotation_file_name = audio_file_name.replace(".flac",".Table.1.selections.txt")
    df = pd.read_csv(f"{annotations_path}/{annotation_file_name}", sep="\t")
    df = df. sort_values(['Selection'], ascending=[True])
    for _, row in df.iterrows():
        selection = row["Selection"]
        if selection == 1:
            start_time = 0.0
            end_time = row["Begin Time (s)"]
            start_time_next = row["End Time (s)"]
        if selection == len(df):
            start_time = start_time_next
            end_time = librosa.get_duration(y=x, sr=sr)
        else:
            start_time = start_time_next
            end_time = row["Begin Time (s)"]
            start_time_next = row["End Time (s)"]

        call_type = 'no_vocalization'

        # convert time to sample index
        start_sample = librosa.time_to_samples(start_time, sr=sr)
        end_sample = librosa.time_to_samples(end_time, sr=sr)

        # extract the sample
        extracted_sample = x[start_sample:end_sample]

        # save the extracted sample to a new file
        isExist = os.path.exists(f"{extracted_calls_path}/{call_type}")
        if not isExist:
            os.makedirs(f"{extracted_calls_path}/{call_type}")
            
        afn = audio_file_name.replace(".flac","")
            
        path = f"{extracted_calls_path}/{call_type}/{afn}_{selection}.wav"
        sf.write(path, extracted_sample, sr)