## Import packages

In [1]:
%matplotlib widget

In [2]:
import os
import glob
import numpy  as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import ipywidgets as widgets
import wave

### Choose file to inspect

In [3]:
directory='/Users/saroltagabulya/git/Orca/'
wavs=glob.glob('wav_files/*.wav')
file=[]

dropdown=widgets.Dropdown(
    options=wavs,
    description='Wav files:',
    disabled=False,
)

def dropdown_eventhandler(change):
    file.clear()
    file.append(change.new)
    print(change.new)
dropdown.observe(dropdown_eventhandler, names='value')
display(dropdown)

Dropdown(description='Wav files:', options=('wav_files/2015-11-10--15-25.wav', 'wav_files/2017-02-04--10-25-15…

wav_files/2017-02-04--10-25-15--00-05-25--C.wav


### Read metadata


In [4]:
# Set sample rate
try:
    wave_file=wave.open(file[0], "rb")
    sample_rate = wave_file.getframerate()
except:
    sample_rate=int(input('Please check sampling rate manually in the metadata file and set below in Hz! \n'))
    
# Set reference value
ref_value=int(input('Please check reference value in the files README and set below! \n'))

# Calibration value
cal_value=int(input('Please check calibration value in the files README and set below!'  +
                    '\n what does the fullscale voltage correspond to? \n '))

# Set researcher
researcher= input('Please indicate the researcher who performs the crop by initials [SG, JR] \n ')

Please check sampling rate manually in the metadata file and set below in Hz! 
100000
Please check reference value in the files README and set below! 
1
Please check calibration value in the files README and set below!
 what does the fullscale voltage correspond to? 
 1
Please indicate the researcher who performs the crop by initials [SG, JR] 
 SG


### Read in wav file

In [5]:
# Read in wav
filename = directory + file[0]
y_all, sr=librosa.load(filename, sr=sample_rate, mono=False)
y=y_all[0]
print('Number of data points: {}, fs: {} Hz, resulting file length: {} min.'.format(np.shape(y)[0], sr, np.shape(y)[0]/sr/60))

Number of data points: 32500326, fs: 100000 Hz, resulting file length: 5.416721 min.


## Display wav file

In [6]:
# Display wav
plt.figure(figsize=(20, 5))
librosa.display.waveshow(y, sr)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



### Calculate spectogram

In [7]:
def convert_wav_to_spect(y, ref_value):
    # Convert to spectogram 
    Y = librosa.stft(y) 
    #Ydb = librosa.amplitude_to_db(abs(Y), ref=1)
    Ydb = librosa.amplitude_to_db(abs(Y), ref=ref_value) # Try if it works leaving away abs!
                                               # Check whether reference should go in as amplitude or power
    return Ydb

### Event handling function

In [8]:
def onclick_for_s(s):
    def onclick(event):
        if event.key=='b':
            row.clear()
            events.append(event)
            row.append(event.xdata + (s)*10*60)
            plt.gca().set_title('onset: {}'.format(event.xdata))

        elif event.key=='e':
            events.append(event)
            row.append(event.xdata + (s)*10*60)
            plt.gca().set_title('offset: {}'.format(event.xdata))

            if len(row)==2:
                row.append(filename)
                time_stamps.loc[len(time_stamps)] = row
                plt.gca().set_title('Timestamps appended. \n onset: {} \n offset {}'.format(onset, offset))
                row.clear()

        elif event.key == 's':
            # save croppings
            print('saved dataframe')
            time_stamps.to_csv('time_stamps.csv')

        else:
            plt.gca().set_title('Could not interpret user input, please make a new selection:\n b: define onset timestamps \n e: define offset timestamp and add to df \n s: save df to csv file'.format(onset, offset))
    return onclick

### Initializing data structures

In [9]:
time_stamps=pd.DataFrame(columns=['onset', 'offset', 'filename'])
events = []
row=[]

### Crop Ydb into smaller segments

In [10]:
y_list=[]
ideal_length=600*sample_rate
segment_boundaries=list(range(0, np.shape(y)[0], ideal_length))

if np.shape(y)[0]>ideal_length:
    a=round(np.shape(y)[0]/ideal_length)
    
    for i in list(range(a+1)):
        onset=segment_boundaries[i]
        try:
            offset=segment_boundaries[i+1]
        except: 
            offset=np.shape(y)[0]
        y_list.append(y[onset:offset])
        print(onset, offset)
else:
    onset=0
    offset=np.shape(y)[0] +1
    print(onset, offset)
    y_list.append(y[onset:offset])

0 32500327


### Plotting and connecting eventhandler to plot

In [11]:
for s in list(range(len(y_list))):
    segment=y_list[s]
    spect=convert_wav_to_spect(segment, ref_value)
    spect=spect[0:250, :]
    # Plot spectrogram
    plt.figure(figsize=(200, 5))
    # tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
    plt.gcf().canvas.mpl_connect('key_press_event', onclick_for_s(s))
    librosa.display.specshow(spect, cmap='seismic', sr=sample_rate, x_axis='time', y_axis='hz')
    plt.xticks(np.arange(0, len(spect)+1, 10))
    plt.colorbar()
    print(s)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

0


In [12]:
time_stamps

Unnamed: 0,onset,offset,filename
0,173.870337,186.285687,/Users/saroltagabulya/git/Orca/wav_files/2017-...


### Make sure to save time_stamps as csv by clicking s on graph!