In [1]:
# created by L. Oliver Sabor
# lukesabor@gmail.com
# last updated May 19 2021

In [2]:
import csv
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import numpy as np
import pandas as pd
import glob as glob
import os
from IPython.display import HTML, Javascript, display
import time
import random

In [4]:
file_prefix = 'soundfiles/'
result_file = 'results/'
complete_folder = '../Dropbox/Bird_results_final/'
complete_suffix = '.Table.1.selections.txt'




locs = [x[len(file_prefix):] for x in glob.glob(file_prefix+'*/*')]
for loc in locs:
    if loc[0:4] != 'old_':
        directory = complete_folder+loc
        if not os.path.exists(directory):
            os.makedirs(directory)
        local = result_file+loc
        if not os.path.exists(local):
            os.makedirs(local)
        complete = complete_folder+loc
        if not os.path.exists(complete):
            os.makedirs(complete)

rawfiles = glob.glob(file_prefix+'/*/*/*.wav')
rawfiles = [f[len(file_prefix):-4] for f in rawfiles]
files = []
for f in rawfiles:
    if f[0:4] != 'old_':
        files.append(f)    

In [5]:
files

['May_19\\NEW_HOME_NEW_FILES\\501669_20200518_050000',
 'May_19\\NEW_HOME_NEW_FILES\\501669_20200518_070000']

In [6]:
def restart_run_all():
    display(HTML(
        '''
            <script>
                code_show = false;
                function restart_run_all(){
                    IPython.notebook.kernel.restart();
                    setTimeout(function(){
                        IPython.notebook.execute_all_cells();
                    }, 2000)
                }
                restart_run_all()
            </script>
        '''
    ))

def is_local_high(spec,freq,timestep,threshold):
    timestep = max(2,timestep)
    freqs = [int(.6*(freq+x)) for x in [-15,-10,-5,0,5,10,15]]
    oval_amps = []
    for f in freqs:
        local = [spec[0][f][t] for t in range(timestep-1,timestep+1)]
        local_avg = np.mean(local)
        oval_amps.append(local_avg)
    if np.mean(oval_amps) > threshold:
        return(True)
    else:
        return(False)

def get_hoot_centers(potential_hoots):
    potential_hoots.append(0)
    hoot_centers = []
    temp = []
    for time in potential_hoots:
        if temp == []:
            temp.append(time)
        elif time - temp[-1] in range(0,3):
            temp.append(time)
        else:
            avg = int(np.mean(temp))
            hoot_centers.append(avg)
            temp = [time]
    return(hoot_centers)


def separate_hoot_events(hoot_centers,spec,length):
    spacing = 1
    hoot_centers.append(0)
    hoot_events = []
    temp = []
    for time in hoot_centers:
        if temp == []:
            temp.append(time)
        elif time - temp[-1] in range(0,int(spacing*(len(spec[2])/length))):
            temp.append(time)
        else:
            hoot_events.append(temp)
            temp = [time]
    return(hoot_events)

def remove_noise(hoot_events):
    good_events = []
    for event in hoot_events:
        if len(event) >= 4:
            if len(event) > 6:
                event = event[0:6]
            good_events.append(event)
    return(good_events)

def get_event_time_bounds(hoot_events,spec,freq,min_threshold):
    beginning_buffer = 10
    end_buffer = 13
    event_time_bounds = []
    f = int(.6*freq)    
    for event in hoot_events:
        temp= []
        s = event[0]       
        amp = spec[0][f][s]
        while amp > min_threshold:
            s -= 1
            try:
                amp = spec[0][f][s]
            except:
                amp = 0
                print(s,'out of range')
        set_beg = event[0]-beginning_buffer
        s = (set_beg + max(set_beg,s))//2
        temp.append(s-1)

        s = event[-1]       
        amp = spec[0][f][s]
        while amp > min_threshold:
            s += 1
            try:
                amp = spec[0][f][s]
            except:
                amp = 0
                print(s,'out of range')
        set_end = event[-1]+end_buffer
        s = (set_end + min(set_end,s))//2
        temp.append(s+1)
        event_time_bounds.append(temp)
    return(event_time_bounds)

def get_event_freq_bounds(hoot_events,spec,freq,min_threshold):
    event_freq_bounds = []
    f = int(.6*freq)
    for event in hoot_events:
        event = event + [x-1 for x in event] + [x-2 for x in event] + [x-3 for x in event] + [x+1 for x in event]  + [x+2 for x in event] 
        temp = []
        maxes = []
        mins = []
        for timestep in event:
            current_f = f
            amp = spec[0][current_f][timestep]
            while amp > min_threshold:
                current_f += 1
                try:
                    amp = spec[0][current_f][timestep]
                except:
                    amp = 0
                    print(f,'out of range')
            maxes.append(int(current_f/.6))

            current_f = f
            amp = spec[0][current_f][timestep]
            while amp > min_threshold:
                current_f -= 1
                try:
                    amp = spec[0][current_f][timestep]
                except:
                    amp = 0
                    print(f,'out of range')
            mins.append(int(current_f/.6))
        maxes = list(filter(lambda x: x < 240,maxes))
        maxes.append(205+int(1.7*np.random.poisson(5)))
        mx = max(maxes)
        if mx < 215:
            mx = int((mx+230)/2)
        mins  = list(filter(lambda x: x > 70,mins))
        mins.append(80-int(1.5*np.random.poisson(5)))
        mn = min(mins)
        if mn > 75:
            mn = int((mn+75)/2)
        temp = [mn,mx]
        event_freq_bounds.append(temp)
    return(event_freq_bounds)

def compile_times_freqs(event_times,event_freqs,length,spec,section_start_time):
    events = []
    for i in range(len(event_times)):
        raw = event_times[i]
        times_sec = []
        for time in raw:
            times_sec.append(round(time/(len(spec[2])/length)+section_start_time,3))
        events.append([times_sec,event_freqs[i]])
    return(events)







def run_file(prefix,file_name,sensitivity):
    if sensitivity == 'normal':
        suffix = ''
        sensitive = False
    else:
        suffix = '_sensitive'
        sensitive = True
    
    sr, sample = wavfile.read(prefix+file_name+'.wav')
    
    section_length = 300
    
    file_duration = len(sample)
    seg_count = file_duration//(section_length*sr)+1
    file_suffix = suffix + complete_suffix
    
    try:
        df = pd.read_csv(complete_folder+file_name+file_suffix)
        print(file_name+file_suffix,'already complete')
        return()
    except:
        pass
    
    try:
        log = open("log.txt",'r')
    except:
        log = open("log.txt",'w+')
    try:
        fjkldasjdfkl
#         df = pd.read_csv(result_file+file_name+file_suffix,sep='\t', lineterminator='\n')
#         l = log.read()
#         if l == '':
#             most_recent = -1
#         else:
#             most_recent = int(l)
#         print(file_name+file_suffix,'file already started. Starting after:',most_recent)  
    except:       
        empty_line = {
                'View'           : ['Spectrogram 1'],
                'Channel'        : [1],
                'Begin Time (s)' : [-1],
                'End Time (s)'   : [-1],
                'Low Freq (Hz)'  : [-1],
                'High Freq (Hz)' : [-1]
            }
        ddf = pd.DataFrame.from_dict(empty_line)
        ddf.to_csv(result_file+file_name+file_suffix, sep="\t",quoting=csv.QUOTE_NONE,index=False)
        most_recent = -1
        print('started new file',file_name+file_suffix)
        df = pd.DataFrame()
    log.close()
    
    next_section = int(most_recent//section_length)+1
    
    for i in range(next_section,seg_count):

        start = i*(sr*section_length)
        section_start = i*(section_length)
        end = start+(sr*section_length)
        
        s = sample[start:end]
        length = len(s)/16000
        nft = int(sr*.06)
        nlp = int(sr*.025)
        cmap = plt.get_cmap('nipy_spectral')
    
#         lowf,highf = 50,250
#         hootstart = -start_time + 830
#         hootend   = hootstart+5
        spec = plt.specgram(s,NFFT=nft, Fs=sr,noverlap=nlp,cmap=cmap,pad_to=10*nft)
#         plt.ylim(lowf,highf)
#         plt.xlim(hootstart,hootend)
#         plt.show()

        
        freq = 145
        threshold = 8 if sensitive else 20 # 2, 8 normal
        min_threshold = 3 if sensitive else 5 # 0.15, 3 normal

        potential_hoots = []
        for timestep in range(5,len(spec[2])-5):
            if is_local_high(spec,freq,timestep,threshold):
                potential_hoots.append(timestep)

        hoot_centers = get_hoot_centers(potential_hoots)
        hoot_events = separate_hoot_events(hoot_centers,spec,length)
        hoot_events = remove_noise(hoot_events)
        event_times = get_event_time_bounds(hoot_events,spec,freq,min_threshold)
        event_freqs = get_event_freq_bounds(hoot_events,spec,freq,min_threshold)
        events = compile_times_freqs(event_times,event_freqs,section_length,spec,section_start)
        del spec
        for e in events:
            new_line = {
                'View'           : ['Spectrogram 1'],
                'Channel'        : [1],
                'Begin Time (s)' : [e[0][0]],
                'End Time (s)'   : [e[0][1]],
                'Low Freq (Hz)'  : [e[1][0]],
                'High Freq (Hz)' : [e[1][1]],
                'Noise'          : ['']
            }
            ndf = pd.DataFrame.from_dict(new_line)
            df = df.append(ndf,ignore_index=True)
        if len(df) > 1:
            df.to_csv(result_file+file_name+file_suffix,sep="\t",quoting=csv.QUOTE_NONE,index=False)
            most_recent = list(df['End Time (s)'])[-1]
            #print('saved',file_name)
        else:
            print('segment',i*section_length,'to',(i+1)*section_length-1,'was empty')
        most_recent = (i+1)*section_length-1
        log = open('log.txt','w')
        log.write(str(most_recent))
        log.close()
        print('processed up to',most_recent,'/',seg_count*section_length)
        
    os.remove('log.txt')
    
    if len(df)>2:
        df.to_csv(complete_folder+file_name+file_suffix, sep="\t",quoting=csv.QUOTE_NONE,index=False)
    else:
        print('empty df')
        new_line = {
                'View'           : ['Spectrogram 1'],
                'Channel'        : [1],
                'Begin Time (s)' : [-1],
                'End Time (s)'   : [-1],
                'Low Freq (Hz)'  : [-1],
                'High Freq (Hz)' : [-1],
                'Noise'          : ['']
            }
        ndf = pd.DataFrame.from_dict(new_line)
        ndf.to_csv(complete_folder+file_name+file_suffix, sep="\t",quoting=csv.QUOTE_NONE,index=False)
        
    restart_run_all()
    time.sleep(10)

In [7]:
for file_name in files:
    run_file(file_prefix,file_name,'normal')
    run_file(file_prefix,file_name,'sensitive')

    
print('all files complete')

May_19\NEW_HOME_NEW_FILES\501669_20200518_050000.Table.1.selections.txt already complete
May_19\NEW_HOME_NEW_FILES\501669_20200518_050000_sensitive.Table.1.selections.txt already complete
May_19\NEW_HOME_NEW_FILES\501669_20200518_070000.Table.1.selections.txt already complete
May_19\NEW_HOME_NEW_FILES\501669_20200518_070000_sensitive.Table.1.selections.txt already complete
all files complete
