# Run BirdNet on Bucket files #

* Create a user managed vertex notebook on GCP
* This script doesnt use GPUs, so only select CPU resources
* Model is TFLite -- you can use the default env it gives you, which is Tensorflow
* Drag this script into the home directory that pops up when you open the notebook and run everything from there.

# Mount buckets

In [1]:
# Mount raw data bucket - this bucket contains all the wav files 
!mountpoint -q /home/jupyter/gcs_raw && echo "mounted" || mkdir -p gcs_raw; gcsfuse --implicit-dirs --rename-dir-limit=100 --disable-http2 --max-conns-per-host=100 "acoustic-data-raw" "/home/jupyter/gcs_raw"
# Mount outputs bucket - results csvs will be written to this bucket
!mountpoint -q /home/jupyter/gcs_outputs && echo "mounted" || mkdir -p gcs_outputs; gcsfuse --implicit-dirs --rename-dir-limit=100 --disable-http2 --max-conns-per-host=100 "acoustic-processing-outputs" "/home/jupyter/gcs_outputs"

2022/11/24 10:48:38.247257 Start gcsfuse/0.41.8 (Go version go1.18.4) for app "" using mount point: /home/jupyter/gcs_raw
2022/11/24 10:48:38.264325 Opening GCS connection...
2022/11/24 10:48:38.395506 Mounting file system "acoustic-data-raw"...
2022/11/24 10:48:38.419094 File system has been successfully mounted.
2022/11/24 10:48:38.551976 Start gcsfuse/0.41.8 (Go version go1.18.4) for app "" using mount point: /home/jupyter/gcs_outputs
2022/11/24 10:48:38.566143 Opening GCS connection...
2022/11/24 10:48:38.672340 Mounting file system "acoustic-processing-outputs"...
2022/11/24 10:48:38.705055 File system has been successfully mounted.


In [2]:
# If librosa doesn't import, pip install it
!pip show librosa && echo "librosa installed" || pip install librosa



# Clone repo

In [2]:
# This is the new one - more species, and other things also!!
![ -d "/home/jupyter/BirdNET-Analyzer" ] && echo "Scripts are downloaded" || !git clone https://github.com/kahst/BirdNET-Analyzer.git
!wget -O BirdNET-Analyzer/species_list.txt "https://www.dropbox.com/s/3ji3dzxs9gsa6t7/london_birdnet_a.txt?dl=0"

fatal: destination path 'BirdNET-Analyzer' already exists and is not an empty directory.
--2022-11-23 10:58:09--  https://www.dropbox.com/s/3ji3dzxs9gsa6t7/london_birdnet_a.txt?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.2.18, 2620:100:6020:18::a27d:4012
Connecting to www.dropbox.com (www.dropbox.com)|162.125.2.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/3ji3dzxs9gsa6t7/london_birdnet_a.txt [following]
--2022-11-23 10:58:10--  https://www.dropbox.com/s/raw/3ji3dzxs9gsa6t7/london_birdnet_a.txt
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc79d509f3034583e0c46adc6d13.dl.dropboxusercontent.com/cd/0/inline/BxQUf8KRDLr5gdfGLQStBMFDCF4Byj4Bx3tL3q-IY_ZeUzUmCNnGT3M5SeWxq3eVLrAcfmwdIubQX754F4fKwgC0epYpwPvl6lnN0ppKyniMBROEL5D8F14hT2cft3sUNZijANKASQxUU2eczvysKSaqxB_e7rW2LM2sj-D9u2A_sA/file# [following]
--2022-11-23 10:58:10--  https://uc79d509f3034583e0c46adc6d13

## Set folders for input (i.e. sound folders to analyse) and output ##

In [5]:
# Change these folders
# Make sure output is in a gcs bucket
input_folder = "/home/jupyter/gcs_raw/trial_data_2021/bird-config/"
results_folder ="/home/jupyter/gcs_outputs/trial_data_2021/birdnet/"

# Run analyser #

In [None]:
cd BirdNET-Analyzer

/home/jupyter/BirdNET-Analyzer


In [6]:
!python analyze.py --i "$input_folder" --o "$results_folder" --lat 51.507359 --lon -0.136439 --week 11 --min_conf 0.8 --slist 'species_list.txt' --threads 8

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
Species list contains 109 species
Found 627 files to analyze
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M18_75_bird/20220330_183000.WAV
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M18_75_bird/20220409_183000.WAV
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M24_73/20220407_183000.WAV
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M29_107_bird/20220405_183000.WAV
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M2_30/20220327_183000.WAV
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M2_30/20220406_183000.WAV
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M2_30/20220416_183000.WAV
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M2_30/20220426_183000.WAV
Finished /home/jupyter/gcs_raw/nr-acoustic-data/bird-config/M18_75_bird/20220409_183000.WAV in 171.67 seconds
Analyzing /home/jupyter/gcs_raw/nr-acoustic-data/bird-con

# Concatenate results into one csv #

In [8]:
import glob as glob
import pandas as pd
import os
from datetime import date

In [53]:
d = date.today()
d = str(d)
results_list = glob.glob(results_folder +  '*/*.txt')
metadata = pd.read_csv('/home/jupyter/gcs_raw/nr-acoustic-data/metadata/NR_deployment_2022_ARUs.csv')

li = []

for filename in results_list:
    df = pd.read_csv(filename, sep='\t')
    df['file_name'] = os.path.splitext(os.path.basename(filename))[0]
    df['path'] = filename[:-3]+'WAV'
    df['datetime'] = [f[0:15] for f in df['file_name']]
    df['time'] = pd.to_datetime(df['datetime'], format='%Y%m%d_%H%M%S') +  pd.to_timedelta(df["Begin Time (s)"], unit='s')
    df['date'] = [f[0:7] for f in df['file_name']]
    df['ID'] = os.path.basename(os.path.dirname(filename))
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)
frame = pd.merge(frame, metadata, on='ID', how='inner')
frame.to_csv(results_folder + d + '_concatenated_results_birdnet.csv')