In [None]:
#BIRNET_SIMPLIFIED: authored by Sylvain Haupert and modified by Ren Kerkhofs

SEGMENT_DURATION = 3 # in seconds
AUDIO_DURATION = 60 # in seconds

# List of birds that are present in the Risoux Forest
codes_jura = ['turphi','fricoe','perate','erirub','sylatr','colpal','turmer','lopcri','pyrpyr','turvis','trotro','gargla','regign','regreg','turtor','cerfam','phycol','poemon','loxcur','prumod','drymar','siteur','denmaj','cyacae','pictri','anttri','nuccar','corcor','parmaj','carspi','coccoc','poepal','coccor','carcar','motalb','carcan','cuccan','picvir','phooch','phopho','phybon','phytro','sercit','carchl','embcit','tetbon','teturo','scorus','glapas','aegfun','stralu','butbut','falper']

# I kept this to the full list because we don't know
codes = codes_jura

import os 

#specify this for your drive, the SAVE_PATH is where the output will be saved. The package path I would keep because this is the folder we have been given that has the actual packages needed.
#from google.colab import drive
#drive.mount('/content/drive')
DATA_PATH = './data/audiomoth_ash' # location of data files
SAVE_PATH = './data/birdnet_savedir' #location of save directory
PACKAGE_PATH = '.venv/Lib' #location of packages needed to execute software

# create the folders and subfolders
try:
    os.makedirs(DATA_PATH)
except FileExistsError:
    # directory already exists
    pass
try:
    os.makedirs(SAVE_PATH)
except FileExistsError:
# directory already exists
    pass
try:
    os.makedirs(PACKAGE_PATH)
except FileExistsError:
    # directory already exists
    pass

!pip install scikit-maad
!pip install ffmpeg
!pip install librosa 
!pip install gdown
!git clone https://github.com/kahst/BirdNET-Analyzer.git $PACKAGE_PATH/BirdNET-Analyzer

# general packages
import warnings
warnings.filterwarnings(action='ignore')
import os
import sys
import time
import math
from pathlib import Path        # to support linux and windows paths
import random                   # to provide random numbers
import re                       # to perform regular expression matching
import json                     # to read json file
from glob import glob           # to collect files in a directory and subdirectory

# basic packages
import numpy as np              # adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import pandas as pd             # library providing high-performance, easy-to-use data structures and data analysis tools (Dataframe)
import matplotlib.pyplot as plt # a comprehensive library for creating static, animated, and interactive visualizations
plt.rcParams.update({'figure.max_open_warning': 0})
plt.style.use("default")

# audio package
import librosa
import librosa.display

# scikit-learn package (machine learning)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Google Drive packages
import gdown

# Scikit-Maad (ecoacoustics functions) package
from maad import sound
from maad import util

###### BIRDNET #######
# go to the directory
os.chdir(PACKAGE_PATH+'/BirdNET-Analyzer')
# import all functions to use BirdNET that are contained in analyze.py
from analyze import *

#@title Global settings { run: "auto", vertical-output: true, form-width: "1000px", display-mode: "both" }
###### BirdNET parameters
# Set the GPS coordinates of France
# Recording location latitude. Set -1 to ignore.
LAT = 46.545496           #@param {type:"number"}
# Recording location longitude. Set -1 to ignore
LON = 6.093823            #@param {type:"number"}
# Week of the year when the recording was made. Values in [1, 48] (4 weeks per month). Set -1 to ignore.
WEEK = 41               #@param {type:"slider", min:-1, max:52, step:1}
# Overlap in seconds between extracted spectrograms. Values in [0.0, 2.9]. Defaults tp 0.0.
OVLP = 0                #@param {type:"slider", min:0.0, max:2.9, step:0.1}
# Detection sensitivity; Higher values result in higher sensitivity. Values in [0.5, 1.5]. Defaults to 1.0.
SENSITIVITY = 1         #@param {type:"slider", min:0.5, max:1.5, step:0.1}
# Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1.
MIN_CONFIDENCE = 0.75    #@param {type:"slider", min:0.01, max:0.99, step:0.01}

#put whatever you want your new folder name to be as instead of JURA_TEST
!python analyze.py --i "$DATA_PATH" --o "$SAVE_PATH/WP3_75" --rtype 'csv' --lat $LAT --lon $LON --week $WEEK --sensitivity $SENSITIVITY --min_conf $MIN_CONFIDENCE


# to manipulate date and time
from datetime import datetime

# grab all birdnet output files
filelist = glob(SAVE_PATH+'/WP3_75/**/*.csv', recursive = True)

# create a dataframe with all anotations files in the directory
# add new columns similar to the output of BirdNET in order to compate both
# results
df_raw_birdnet = pd.DataFrame()

# list of columns
cols = ['filename', 'Start (s)','End (s)','Confidence', 'label' ]

for file in filelist:
  # read the csv file associated with the audio file
  df_rois = pd.read_csv(file, sep=',' )

  # if there is a detection, add a column with the filename and a column with the label
  if len(df_rois) > 0 :
    df_rois['filename'] = Path(file).parts[-1][:-20]
    df_rois['label'] = [((x.split(" ", 1)[0][0:3]).lower()+(x.split(" ", 1)[1][0:3]).lower()) for x in df_rois['Scientific name']]

  # else add a column with the filename and set the other columns to NaN
  else :
    df_rois = pd.DataFrame(columns=cols, dtype=float)
    df_rois.loc[0, 'filename'] = Path(file).parts[-1][:-20]
    df_rois.loc[0, 'Start (s)'] = 0
    df_rois.loc[0, 'End (s)']   = 60
    df_rois.loc[0, 'label']     = 'none'
    df_rois.loc[0, 'Confidence']= 1

  # reorder the columns
  df_rois = df_rois[['filename',
                      'Start (s)',
                      'End (s)',
                      'Confidence',
                      'label'
                      ]]

  # add the annotations of the current file into the big dataframe with all
  # annotations
  df_raw_birdnet = pd.concat([df_raw_birdnet, df_rois], axis=0, ignore_index=True)

# display the first rows of the dataframe
df_raw_birdnet

df_raw_birdnet.to_csv('/content/drive/My Drive/JURA/WP3_75.csv', index=False)

SyntaxError: invalid non-printable character U+00A0 (1177790039.py, line 8)