In [None]:
# Some variables

# Frequency range of interest (e.g. bats)
freqRangeOfInterest = [40000,60000]

# Thresholds to collect file of interest. If max value in freqRangeOfInterest exceeds, it will collect
fileOfInterestThreshold = 200.0

# ** Set path to files folder here **
path = '/Users/bill/Documents/AudioMoth/test'

# Browse for path 
from ipyfilechooser import FileChooser
fdialog = FileChooser(
    path,
    title='<b>Browse to Recordings to Process</b>',
    show_hidden=False,
    select_default=True,
    use_dir_icons=True,
    show_only_dirs=True
)
display(fdialog)

In [None]:
import pandas as pd
import numpy as np
import os
import fnmatch
from tinytag import TinyTag
import re
import shutil

import matplotlib.pyplot as plt
%matplotlib inline

import scipy.signal as sp

from scipy.fft import *
from scipy import signal

# Don't Show Warning Messages
import warnings
warnings.filterwarnings('ignore')

# Path from file dialog - defaults to one set above
path=fdialog.selected
print("Path is: ", path)


# Find files and sort them..
files = fnmatch.filter(os.listdir(path), "*.WAV")
files.sort()
print(files)

import soundfile as sf

# Define helper functions

# Load a .wav file. 
# These are 24 bit files. The PySoundFile library is able to read 24 bit files.
# https://pysoundfile.readthedocs.io/en/0.9.0/
def get_wav_info(wav_file):
    data, rate = sf.read(wav_file)
    return data, rate

#
# Load spectrogram
#
# source: Andrew Ng Deep Learning Specialization, Course 5
def graph_spectrogram(data, rate):
    nfft = 2048 # Length of each window segment
    fs = rate #384000 # Sampling frequencies
    print("Rate:", rate)
    noverlap = 12 # Overlap between windows
    nchannels = data.ndim
    print("Channels:", nchannels)
    
    widthHeight = (15.32, 7.49)
    plt.figure(figsize=widthHeight)
    if nchannels == 1:
        pxx, freqs, bins, im = plt.specgram(data, NFFT=nfft, Fs=fs, Fc=0, noverlap=noverlap, cmap=plt.cm.bone, sides='default', mode='default', scale='dB')
        #pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap = noverlap,cmap = plt.cm.bone)
    elif nchannels == 2:
        pxx, freqs, bins, im = plt.specgram(data[:,0], NFFT=nfft, Fs=fs, Fc=0, noverlap=noverlap, cmap=plt.cm.bone, sides='default', mode='default', scale='dB')
        #pxx, freqs, bins, im = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap,cmap = plt.cm.bone)
        

#
# Plot and return freq max values using FFT
#
# TO DO : Check some bucket ranges of interest, e.g. BATs:
#
# 22Khz - 30 Khz
# 40Khz - 60 Khz
#
def get_plot_freqs(data, rate):
    N = len(data)
    yf = np.fft.rfft(data)
    xf = np.fft.rfftfreq(N, d=1./rate)
    
    # Filter out 32Hhz noise signal spike which I seem to get in the data
    f32khz = np.where(np.logical_and(xf>=32500.0, xf<=32900.0))
    if np.size(f32khz) > 0:
        yf[f32khz] = 0.0 
            
    # Filter out low frequency spike
    fLow = np.where(np.logical_and(xf>=0.0, xf<=1000.0))
    if np.size(fLow) > 0:
        yf[fLow] = 0.0    
        
    # Get max in the 40-60Khz range (bat detection)
    finterest_max_val = 0
    finterest = np.where(np.logical_and(xf>=freqRangeOfInterest[0], xf<=freqRangeOfInterest[1]))
    if np.size(finterest) > 0:
        finterest_max_val = np.max(np.abs(yf[finterest]))
  
    # Now get the max frequency on the FFT values bucket
    idx = np.argmax(np.abs(yf))
    max_freq = xf[idx]  
    max_freq_val = np.abs(yf[idx])
    
    # Do out plot of frequencies with the 32Khz spike attenuated..
    plt.plot(xf, np.abs(yf))
    
    # Return the frequency and max val
    return max_freq, max_freq_val, finterest_max_val

#
# Get the comment tag and recorded time
#
def get_comment_and_recorded_time(wav_file):
    tag = TinyTag.get(file_full_path)
    recorded_time = ''
    m = re.search('Recorded at (.+?) by AudioMoth', tag.comment)
    if m:
        recorded_time = m.group(1)
    return tag.comment, recorded_time

#
# Processing here...
#
from pydub import AudioSegment
import IPython
import time

# Iterate all the found WAV files in the directory
for index, singfile in enumerate(files):
    
    # Create candidates dir. Copy interesting files into here..
    candidate_dir = os.path.join(path, 'interesting')
    if not os.path.isdir(candidate_dir):
        print('Creating candidate dir: ', candidate_dir)
        os.mkdir(candidate_dir)
         
    # Full path to the file
    file_full_path = os.path.join(path, singfile)
    
    # Spectrogram and frequency chart file paths
    spectrogram_path = file_full_path + '_spec.jpg'
    freq_chart_path = file_full_path + '_freq.jpg'
    
    # Get comment and recorded time
    comment, recorded_time = get_comment_and_recorded_time(file_full_path)

    # Print some useful info about the files...
    print('--------------------------------------------------------------------------------------------------')
    print('File: ',file_full_path)
    print('Spectrogram:', spectrogram_path)
    print('Details: ', comment)
    
    # Get the WAV file data and sample rate
    data, rate = get_wav_info(file_full_path)
    
    print('Sample rate: ', rate)
    
    # Get the frequency data and plot it
    max_freq, max_freq_val, finterest_max_val = get_plot_freqs(data, rate)
    
    plt.title("Frequency chart - " + singfile + " " + recorded_time)
    plt.xlabel('Freq (hz)')
    plt.ylabel('Value')
    plt.savefig(freq_chart_path, dpi=300, bbox_inches='tight')
    plt.show()
    
    print('Max freq Khz:', max_freq/1000)
    print('Max freq value: ', max_freq_val)
    print('Freq of interest range (Khz): ', freqRangeOfInterest[0]/1000, freqRangeOfInterest[1]/1000)
    print('Max value in range of interest: ', finterest_max_val)
    
    # Plot the spectrogram. Ignore the returned 
    graph_spectrogram(data, rate)
    
    plt.title("Spectrogram - " + singfile + " " + recorded_time)
    plt.xlabel('Time (s)')
    plt.ylabel('Freq (hz)')
    plt.savefig(spectrogram_path, dpi=300, bbox_inches='tight')
    plt.show()
    
    if finterest_max_val > fileOfInterestThreshold:
        print('File: '+singfile+' looks interesting!! Moving to candidates dir.')
        shutil.move(file_full_path, os.path.join(candidate_dir, singfile))
        shutil.move(spectrogram_path, os.path.join(candidate_dir, singfile)+'_spec.jpg')
        shutil.move(freq_chart_path, os.path.join(candidate_dir, singfile)+'_freq.jpg')
        
print("** Finished **")
        
        