# Embedding unlabeled audio for bird classification using Perch

## Set up environment

Here we create a coding environment for Perch, a bird clkassification model developed by google

https://github.com/google-research/perch  
https://www.kaggle.com/models/google/bird-vocalization-classifier/frameworks/tensorFlow2/variations/bird-vocalization-classifier/versions/4  
https://www.kaggle.com/code/tabassumnova/google-bird-vocalization-classifier-model-77-acc/notebook

To create the environment for this workflow follow the following steps:
1) Install Anaconda (https://www.anaconda.com/download/)
2) Create and activate environment, using powershell
```
conda create --name GooglePerch python=3.10
conda activate GooglePerch
```

3) Install base libraries
```
conda install -c conda-forge mamba
mamba install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 cudatoolkit-dev ipykernel ipython nbformat numpy pandas  cudatoolkit-dev matplotlib glob2
python -m pip install "tensorflow<2.11" "tensorflow-gpu<2.11" "tensorflow-io==0.27.0" tensorflow-hub librosa
```

4) Check tensorflow install
```
python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
```

https://stackoverflow.com/questions/68614547/tensorflow-libdevice-not-found-why-is-it-not-found-in-the-searched-path

5) Remove and clean
```
conda deactivate
conda remove --name GooglePerch --all
conda build purge
conda clean -a -i -p -t -f -c -l
```

## Load packages

In [1]:
# Deep learning
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio

# Set memory growing to true
gpus = tf.config.list_physical_devices('GPU')
if gpus:
      for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.list_logical_devices('GPU')

# Hide warning messages
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Disable eager execution 
#tf.compat.v1.disable_eager_execution()

# Data loading and manipulation
import pandas as pd
import numpy as np
import librosa
import csv
import json
from tqdm import tqdm

# File navigation
#import glob
#from pathlib import Path
#import io
import os

# Viewing
#from IPython.display import Audio

## Prepare model and lookup tables

In [2]:
# Directory
dir = "P:\\Projets\\Actif\\2023_ECCC4_Biodiv\\3-Analyses\\"

# Load model and lables locally. We are using version 4
model = hub.load(dir + "2-Analyses\\Perch\\")
labels_path = hub.resolve(dir + "2-Analyses\\Perch\\") + "assets\\label.csv"

# From the hub
#model = hub.load('https://kaggle.com/models/google/bird-vocalization-classifier/frameworks/tensorFlow2/variations/bird-vocalization-classifier/versions/4')
#labels_path = hub.resolve('https://kaggle.com/models/google/bird-vocalization-classifier/frameworks/tensorFlow2/variations/bird-vocalization-classifier/versions/4') + "/assets/label.csv"

# Grab list of birds the model has trained on
def class_names_from_csv(labels_path):
    """Returns list of class names corresponding to score vector."""
    with open(labels_path) as csv_file:
        csv_reader = csv.reader(csv_file)
        class_names = [row[0] for row in csv_reader]
        return class_names[1:]
model_classes=class_names_from_csv(labels_path)

# Save to disk?
#model_classesDF = pd.DataFrame(model_classes)
#model_classesDF.to_csv(dir + "1-Data\\Biodiversity\\Perchm_model_classes.csv", index=False)

In [3]:
# Load taxonomy data for filtering species codes
with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomy:
    taxonomy = json.load(taxonomy)
taxonomy = taxonomy.get("mappings", None)
taxonomy = taxonomy.get("ebird2021_clements_to_species", None)

with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomyextra:
    taxonomyextra = json.load(taxonomyextra)
taxonomyextra = taxonomyextra.get("mappings", None)
taxonomyextra = taxonomyextra.get("ebird2022_clements_to_species", None)

with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomyextra2:
    taxonomyextra2 = json.load(taxonomyextra2)
taxonomyextra2 = taxonomyextra2.get("mappings", None)
taxonomyextra2 = taxonomyextra2.get("ioc_12_2_to_ebird2021", None)

with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomyextra3:
    taxonomyextra3 = json.load(taxonomyextra3)
taxonomyextra3 = taxonomyextra3.get("mappings", None)
taxonomyextra3 = taxonomyextra3.get("xenocanto_11_2_to_ebird2022_species", None)

with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomyextra4:
    taxonomyextra4 = json.load(taxonomyextra4)
taxonomyextra4 = taxonomyextra4.get("mappings", None)
taxonomyextra4 = taxonomyextra4.get("ebird2022_to_species", None)

with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomyextra5:
    taxonomyextra5 = json.load(taxonomyextra5)
taxonomyextra5 = taxonomyextra5.get("mappings", None)
taxonomyextra5 = taxonomyextra5.get("xenocanto_to_ebird2021", None)

with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomyextra6:
    taxonomyextra6 = json.load(taxonomyextra6)
taxonomyextra6 = taxonomyextra6.get("mappings", None)
taxonomyextra6 = taxonomyextra6.get("ebird2021_to_genus", None)

with open(dir + "2-Analyses\\Perch\\assets\\taxonomy_database.json", "r") as taxonomyextra7:
    taxonomyextra7 = json.load(taxonomyextra7)
taxonomyextra7 = taxonomyextra7.get("mappings", None)
taxonomyextra7 = taxonomyextra7.get("ebird2022_to_genus", None)

# Convert to pandas dataframe
taxonomy = pd.DataFrame(taxonomy)
taxonomy.reset_index(inplace=True)
taxonomy = taxonomy.iloc[:, :2]
taxonomy.rename(columns={"index": "Sp_name", "mapped_pairs": "Sp_code"}, inplace=True)

taxonomyextra = pd.DataFrame(taxonomyextra)
taxonomyextra.reset_index(inplace=True)
taxonomyextra = taxonomyextra.iloc[:, :2]
taxonomyextra.rename(columns={"index": "Sp_name", "mapped_pairs": "Sp_code"}, inplace=True)

taxonomyextra2 = pd.DataFrame(taxonomyextra2)
taxonomyextra2.reset_index(inplace=True)
taxonomyextra2 = taxonomyextra2.iloc[:, :2]
taxonomyextra2.rename(columns={"index": "Sp_name", "mapped_pairs": "Sp_code"}, inplace=True)

taxonomyextra3 = pd.DataFrame(taxonomyextra3)
taxonomyextra3.reset_index(inplace=True)
taxonomyextra3 = taxonomyextra3.iloc[:, :2]
taxonomyextra3.rename(columns={"index": "Sp_name", "mapped_pairs": "Sp_code"}, inplace=True)

taxonomyextra4 = pd.DataFrame(taxonomyextra4)
taxonomyextra4.reset_index(inplace=True)
taxonomyextra4 = taxonomyextra4.iloc[:, :2]
taxonomyextra4.rename(columns={"index": "Incorrect_code", "mapped_pairs": "Sp_code"}, inplace=True)

taxonomyextra5 = pd.DataFrame(taxonomyextra5)
taxonomyextra5.reset_index(inplace=True)
taxonomyextra5 = taxonomyextra5.iloc[:, :2]
taxonomyextra5.rename(columns={"index": "Sp_name", "mapped_pairs": "Sp_code"}, inplace=True)

taxonomyextra6 = pd.DataFrame(taxonomyextra6)
taxonomyextra6.reset_index(inplace=True)
taxonomyextra6 = taxonomyextra6.iloc[:, :2]
taxonomyextra6.rename(columns={"index": "Sp_code", "mapped_pairs": "Genus_name"}, inplace=True)

taxonomyextra7 = pd.DataFrame(taxonomyextra7)
taxonomyextra7.reset_index(inplace=True)
taxonomyextra7 = taxonomyextra7.iloc[:, :2]
taxonomyextra7.rename(columns={"index": "Sp_code", "mapped_pairs": "Genus_name"}, inplace=True)

# Merge data 
taxonomy_merged = pd.merge(taxonomy,taxonomyextra, on='Sp_code', how='outer', indicator=True)
taxonomy_merged = taxonomy_merged[taxonomy_merged['_merge'] == 'right_only']
taxonomy_merged.drop(columns=['Sp_name_x','_merge'], inplace=True)
taxonomy_merged.rename(columns={'Sp_name_y': 'Sp_name'}, inplace=True)
taxonomy = pd.concat([taxonomy, taxonomy_merged], ignore_index=True)

taxonomy_merged = pd.merge(taxonomy,taxonomyextra2, on='Sp_code', how='outer', indicator=True)
taxonomy_merged = taxonomy_merged[taxonomy_merged['_merge'] == 'right_only']
taxonomy_merged.drop(columns=['Sp_name_x','_merge'], inplace=True)
taxonomy_merged.rename(columns={'Sp_name_y': 'Sp_name'}, inplace=True)
taxonomy = pd.concat([taxonomy, taxonomy_merged], ignore_index=True)

taxonomy_merged = pd.merge(taxonomy,taxonomyextra3, on='Sp_code', how='outer', indicator=True)
taxonomy_merged = taxonomy_merged[taxonomy_merged['_merge'] == 'right_only']
taxonomy_merged.drop(columns=['Sp_name_x','_merge'], inplace=True)
taxonomy_merged.rename(columns={'Sp_name_y': 'Sp_name'}, inplace=True)
taxonomy = pd.concat([taxonomy, taxonomy_merged], ignore_index=True)

taxonomy_merged = pd.merge(taxonomy,taxonomyextra5, on='Sp_code', how='outer', indicator=True)
taxonomy_merged = taxonomy_merged[taxonomy_merged['_merge'] == 'right_only']
taxonomy_merged.drop(columns=['Sp_name_x','_merge'], inplace=True)
taxonomy_merged.rename(columns={'Sp_name_y': 'Sp_name'}, inplace=True)
taxonomy = pd.concat([taxonomy, taxonomy_merged], ignore_index=True)

taxonomy_merged = pd.merge(taxonomy,taxonomyextra6, on='Sp_code', how='outer', indicator=True)
taxonomy_merged = taxonomy_merged[taxonomy_merged['_merge'] == 'right_only']
taxonomy_merged.drop(columns=['Sp_name','_merge'], inplace=True)
taxonomy = pd.concat([taxonomy, taxonomy_merged], ignore_index=True)

taxonomy_merged = pd.merge(taxonomy,taxonomyextra7, on='Sp_code', how='outer', indicator=True)
taxonomy_merged = taxonomy_merged[taxonomy_merged['_merge'] == 'right_only']
taxonomy_merged.drop(columns=['Sp_name','Genus_name_x','_merge'], inplace=True)
taxonomy_merged.rename(columns={'Genus_name_y': 'Genus_name'}, inplace=True)
taxonomy = pd.concat([taxonomy, taxonomy_merged], ignore_index=True)

taxonomy_merged = pd.merge(taxonomy,taxonomyextra4, on='Sp_code', how='outer', indicator=True)
taxonomy_merged = taxonomy_merged[taxonomy_merged['_merge'] == 'both']
taxonomy_merged.drop(columns=['Sp_code','_merge'], inplace=True)
taxonomy_merged.rename(columns={'Incorrect_code': 'Sp_code'}, inplace=True)
taxonomy = pd.concat([taxonomy, taxonomy_merged], ignore_index=True)

# Remove duplicates
taxonomy = taxonomy.drop_duplicates(subset='Sp_code', keep='first')

# Save to disk?
#taxonomy.to_csv(dir + "1-Data\\Biodiversity\\taxonomy.csv", index=False)

# Clean environment
del taxonomyextra, taxonomyextra2, taxonomyextra3, taxonomyextra4
del taxonomyextra5, taxonomyextra6, taxonomyextra7, taxonomy_merged

In [4]:
# Load nesting birds of Quebec
nestingBirds=pd.read_csv(dir + "1-Data\\Biodiversity\\Nesting_Birds_Quebec.csv")
classes = sorted(nestingBirds['Sp_code'].dropna().unique())

In [5]:
# Identify if there are birds we are interested in, that the model has not been trained on
forced_defaults = 0
class_map = []
for c in classes:
    try:
        i = classes.index(c)
        class_map.append(i)
    except:
        class_map.append(0)
        forced_defaults += 1
if forced_defaults > 0:
    print(f"There are {forced_defaults} birds Perch has not been trained on.")

In [6]:
# Clean environment
del c, i, nestingBirds, forced_defaults, gpus, gpu

## Functions to process the data for predictions

In [7]:
# Functions

# Ensure audio is 5s long and in correct dimention
def frame_audio(
      audio_array: np.ndarray,
      window_size_s: float = 5.0,
      hop_size_s: float = 5.0,
      sample_rate = 32000,
      ) -> np.ndarray:    
    """Helper function for framing audio for inference"""
    if window_size_s is None or window_size_s < 0:
        return audio_array[np.newaxis, :]
    frame_length = int(window_size_s * sample_rate)
    hop_length = int(hop_size_s * sample_rate)
    framed_audio = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=True)
    return framed_audio

# Ensure audio is at 32000 sampling rate
def ensure_sample_rate(
        waveform,
        original_sample_rate,
        desired_sample_rate=32000):
    """Resample waveform if required"""
    if original_sample_rate != desired_sample_rate:
        waveform = tfio.audio.resample(waveform, original_sample_rate, desired_sample_rate)
    return desired_sample_rate, waveform

# Predict on every 5s increment of audio file and return a dataframe with predictions
def predictions(
        file_path,          # Path to audio file
        message = False):    # Boolean to indicate printing progress messages
    """Loading audio 5s at a time for inference"""

    # Print message
    if message == True:
        print(f"Processing {file_path}")

    # Load the audio file
    y, sr = librosa.load(file_path)

    # Get the duration of the audio in seconds
    duration = librosa.get_duration(y=y, sr=sr)

    # Define the length of each increment (in seconds)
    increment_length = 5

    # Calculate the number of increments (including padding)
    num_increments = int(np.ceil(duration / increment_length))

    # Calculate the target duration after padding
    padded_duration = num_increments * increment_length

    # Pad the audio if necessary
    if padded_duration > duration:
        pad_length = int((padded_duration - duration) * sr)
        y = np.pad(y, (0, pad_length), mode='constant')

    # Update the duration after padding
    newduration = librosa.get_duration(y=y, sr=sr)

    # Create lists for storing results
    increment_numbers = []
    sp_codes = []
    sp_names = []
    probs = []
    file_names = []
    file_paths = []
    recording_durations = []
    number_5s_increments = []    

    # Loop over each increment
    for i in range(num_increments):
        # Calculate start and end time for the current increment
        start_time = i * increment_length
        end_time = min((i + 1) * increment_length, newduration)
        
        # Extract the audio for the current increment
        increment_audio = y[int(start_time * sr):int(end_time * sr)]

        # Prepare audio
        _ , increment_audio = ensure_sample_rate(increment_audio, sr)
        fixed_tm = frame_audio(increment_audio)

        # Make test predictions
        logits, _ = model.infer_tf(fixed_tm[:1])
        probabilities = tf.nn.softmax(logits)
        argmax = np.argmax(probabilities)
        SP_Code = model_classes[argmax]
        SP_Name = taxonomy[taxonomy['Sp_code'] == SP_Code]['Sp_name'].iloc[0]
        #if math.isnan(SP_Name):
        #    SP_Name = taxonomy[taxonomy['Sp_code'] == SP_Code]['Genus_name'].iloc[0]
        if isinstance(SP_Name, float):
            SP_Name = taxonomy[taxonomy['Sp_code'] == SP_Code]['Genus_name'].iloc[0]

        # Append values to respective lists
        increment_numbers.append(i + 1)
        sp_codes.append(SP_Code)
        sp_names.append(SP_Name)
        probs.append(probabilities[0].numpy().tolist()[argmax])
        file_names.append(os.path.splitext(os.path.basename(file_path))[0])
        file_paths.append(file_path)
        recording_durations.append(duration)
        number_5s_increments.append(num_increments)

        # Print message
        if message == True:
            print(f"Processing increment {i+1}/{num_increments}")
            print(f"The audio is from the species {SP_Name} with probability of {probabilities[0].numpy().tolist()[argmax]}")

    # Create a dictionary to hold column data
    data = {
        'file_name': file_names,
        'file_path': file_paths,
        'recording_duration': recording_durations,
        'number_5s_increments': number_5s_increments,
        'increment_number': increment_numbers,
        'Sp_code': sp_codes,
        'Sp_name': sp_names,
        'Prob': probs
    }

    # Create a DataFrame
    df = pd.DataFrame(data)

    # Return
    return df

# Loop over directory of audio and predict 
def batch_predictions(
        root_dir,           # Path to directory containing folders and audio files
        message = False):   # Boolean to indicate printing progress messages
    """Predicting on all audio files in directory"""
    
    # Create list for storing results
    results_df = []
    
    # Walk through all directories and subdirectories
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in tqdm(filenames):

            # Check if the file is an audio file
            if filename.endswith('.mp3') or filename.endswith('.wav') or filename.endswith('.ogg') or filename.endswith('.WAV'):
                            
                # Get the full path of the audio file
                audio_file_path = os.path.join(dirpath, filename)

                # Process the audio file
                df = predictions(audio_file_path,message=message)

                # Add to list
                results_df.append(df)
            
    # Concatinate results
    combined_df = pd.concat(results_df, ignore_index=True)

    # Return
    return combined_df

## Predict on audio data

In [None]:
# Specify root directory for all files
#root_dir = dir + "1-Data\\Audiomoths2023"

# Predict and convert to data frame
#df = batch_predictions(root_dir,False)
#df.to_csv(dir + "1-Data/Audiomoths2023_Processed/Perch_output.csv", index=False)

In [36]:
# Specify root directory
#root_dir = dir + "1-Data\\Audiomoths2023\\BOU-2023-006"
test_dir = "C:\\Users\\Jurie\\Desktop\\Perch_TEST\\Audio_data\\New_folder"

# Preduct and convert to data frame
df = batch_predictions(test_dir,False)

# Post processing on dataframe
dff = df.query('Prob > 0.5')
#dff.to_csv(dir + "1-Data/Audiomoths2023_Processed/Perch_output_All_TEST.csv", index=False)
#dff = dff.drop_duplicates(subset='Sp_code', keep='first')
#dff.to_csv(dir + "1-Data/Audiomoths2023_Processed/Perch_output_Unique_TEST.csv", index=False)

# Time = 

100%|██████████| 1/1 [00:06<00:00,  6.99s/it]


In [37]:
dff.to_csv("C:\\Users\\Jurie\\Desktop\\Perch_TEST\\Perch_predictions_noise3.csv", index=False)
dff = dff.drop_duplicates(subset='Sp_code', keep='first')
dff.to_csv("C:\\Users\\Jurie\\Desktop\\Perch_TEST\\Perch_predictions_Unique_noise3.csv", index=False)

### TESTING

Compare to BirdNet

In [34]:
birdnet_df = pd.read_csv("C:\\Users\\Jurie\\Desktop\\Perch_TEST\\Birdnet_predictions_noise2.txt",sep='\t')
birdnet_df = birdnet_df.query('Confidence > 0.5')
birdnet_df = birdnet_df.drop_duplicates(subset='Species Code', keep='first')

In [38]:
perch_df =  pd.read_csv("C:\\Users\\Jurie\\Desktop\\Perch_TEST\\Perch_predictions_noise3.csv")
perch_df = perch_df.query('Prob > 0.5')
perch_df = perch_df.drop_duplicates(subset='Sp_code', keep='first')

In [9]:
# TEST single prediction
file_path = r"P:\Projets\Actif\2023_ECCC4_Biodiv\3-Analyses\2-Analyses\Perch\Test_Data\Canada_Goose_Example.wav"
df = predictions(file_path,message=True)

Processing P:\Projets\Actif\2023_ECCC4_Biodiv\3-Analyses\2-Analyses\Perch\Test_Data\Canada_Goose_Example.wav
Processing increment 1/4
The audio is from the species branta canadensis with probability of 0.9973734617233276
Processing increment 2/4
The audio is from the species branta canadensis with probability of 0.9910955429077148
Processing increment 3/4
The audio is from the species branta canadensis with probability of 0.9837427735328674
Processing increment 4/4
The audio is from the species branta canadensis with probability of 0.9992631077766418


In [8]:
# TEST batch predictions
root_dir = r"P:\Projets\Actif\2023_ECCC4_Biodiv\3-Analyses\2-Analyses\Perch\Test_Data"
df = batch_predictions(root_dir,False)

100%|██████████| 4/4 [00:13<00:00,  3.42s/it]


In [None]:
# DEVELOPMENT TESTING
file_path = r"P:\Projets\Actif\2023_ECCC4_Biodiv\3-Analyses\2-Analyses\Perch\Test_Data\Canada_Goose_Example.wav"
# Load the audio file
y, sr = librosa.load(file_path)

# Get the duration of the audio in seconds
duration = librosa.get_duration(y=y, sr=sr)

# Define the length of each increment (in seconds)
increment_length = 5

# Calculate the number of increments (including padding)
num_increments = int(np.ceil(duration / increment_length))

# Calculate the target duration after padding
padded_duration = num_increments * increment_length

# Pad the audio if necessary
if padded_duration > duration:
     pad_length = int((padded_duration - duration) * sr)
     y = np.pad(y, (0, pad_length), mode='constant')

# Update the duration after padding
newduration = librosa.get_duration(y=y, sr=sr)

# Create lists for storing results
increment_numbers = []
sp_codes = []
sp_names = []
probs = []
file_names = []
file_paths = []
recording_durations = []
number_5s_increments = []

# Calculate start and end time for the current increment
for i in range(num_increments):
     # Calculate start and end time for the current increment
     start_time = i * increment_length
     end_time = min((i + 1) * increment_length, newduration)
        
     # Extract the audio for the current increment
     increment_audio = y[int(start_time * sr):int(end_time * sr)]

     # Prepare audio
     _ , increment_audio = ensure_sample_rate(increment_audio, sr)
     fixed_tm = frame_audio(increment_audio)

     # Make test predictions
     logits, _ = model.infer_tf(fixed_tm[:1])
     probabilities = tf.nn.softmax(logits)
     argmax = np.argmax(probabilities)
     SP_Code = model_classes[argmax]
     SP_Name = taxonomy[taxonomy['Sp_code'] == SP_Code]['Sp_name'].iloc[0]

     # Append values to respective lists
     increment_numbers.append(i + 1)
     sp_codes.append(SP_Code)
     sp_names.append(SP_Name)
     probs.append(probabilities[0].numpy().tolist()[argmax])
     file_names.append(os.path.splitext(os.path.basename(file_path))[0])
     file_paths.append(file_path)
     recording_durations.append(duration)
     number_5s_increments.append(num_increments)

# Create a dictionary to hold column data
data = {
    'file_name': file_names,
    'file_path': file_paths,
    'recording_duration': recording_durations,
    'number_5s_increments': number_5s_increments,
    'increment_number': increment_numbers,
    'Sp_code': sp_codes,
    'Sp_name': sp_names,
    'Prob': probs
}

# Create a DataFrame
df = pd.DataFrame(data)

### TESTING