In [16]:
import pandas as pd
import numpy as np
import os
import gc
import tensorflow as tf
from tensorflow import keras
from classifier import Classifier
from functions import calibrate, adjust, threshold_filter

# PARAMS needed from recording metadata:
# lat: latitude
# lon: longitude
# day: day of the year 1-365

threshold = 0.3 # only save predictions with confidence higher than threshold

# some mockup-values for prediction calibration:
lat = 65.00 # latitude
lon = 24.00 # longitude
day = 150   # day of year 1-365

# load classification model
# TFLITE_THREADS can be as high as number of CPUs available, the rest of the parameters should not be changed
clsf = Classifier(path_to_model='model_v3/model_v3_5.keras', sr=48000, clip_dur=3.0, TFLITE_THREADS = 1, offset=0, dur=0) 

# load species list and post-processing tables for prediction calibration
sp_list=pd.read_csv("classes.csv")
migr_table = np.load('Pred_adjustment/migration_params.npy')
cal_table = np.load('Pred_adjustment/calibration_params.npy')

In [20]:
# define path to audio data
path = "data"

# analyze all files
files = os.listdir(path)

with open(path + '_results.txt', 'a') as f:
    f.write("site, file, species, prediction, detection_time \n")

n_files = len(files)
for j, fi in enumerate(files):
    try:
        print(f"Analyzing {fi} ({j+1}/{n_files})...")
        # predict for example clip
        pred, t = clsf.classify(path + '/' + fi, max_pred=False) #max_pred: only keep highest confidence detection for each species instead of saving all detections
        # calibrate prediction 
        for i in range(len(pred)):
            pred[i, :] = calibrate(pred[i, :], cal_table=cal_table)
        # ignore human and noise predictions
        pred[:,0:2] = 0 
        # filter predictions with a threshold 
        pred, c, t = threshold_filter(pred, t, threshold)
        # adjust prediction based on time of the year and latitude (only if record is from Finland and location and date are known)
        pred = adjust(pred, c, migr_table, lat, lon, day) 
        # filter and find species names from sp_list
        for i in range(len(pred)):
            if c[i] > 1: # ignore two first classes: noise and human
                with open(path + '_results.txt', 'a') as f:
                    f.write(path + ", " + fi + ", " + str(sp_list['common_name'].iloc[c[i]]) + ", " + str(pred[i]) + ", " + str(t[i]) + "\n")
        gc.collect() # clear memory
    except: 
        print(f"Error analyzing {fi}!")

print(" ")
print("All files analyzed")
print(f"Results saved to {path}_results.txt")

Analyzing 1.wav (1/5)...
Loading file data/1.wav...
Classifying recording...
Recording analyzed!
Analyzing 2.wav (2/5)...
Loading file data/2.wav...
Classifying recording...
Recording analyzed!
Analyzing 3.wav (3/5)...
Loading file data/3.wav...
Classifying recording...
Recording analyzed!
Analyzing 5.wav (4/5)...
Loading file data/5.wav...
Classifying recording...
Recording analyzed!
Analyzing 4.wav (5/5)...
Loading file data/4.wav...
Classifying recording...
Recording analyzed!
 
All files analyzed
Results saved to data_results.txt
