## Import packages

In [1]:
%matplotlib widget

In [2]:
import os
import glob
import numpy  as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import ipywidgets as widgets
import wave

### Choose file to inspect

In [3]:
directory='/Users/saroltagabulya/git/Orca/'
wavs=glob.glob('wav_files/*.wav')
file=[]

dropdown=widgets.Dropdown(
    options=wavs,
    description='Wav files:',
    disabled=False,
)

def dropdown_eventhandler(change):
    file.clear()
    file.append(change.new)
    print(change.new)
dropdown.observe(dropdown_eventhandler, names='value')
display(dropdown)

Dropdown(description='Wav files:', options=('wav_files/2015-11-10--15-25.wav', 'wav_files/2017-02-04--10-14-06…

wav_files/2020-12-15--23-21-29--00-40-00--A.wav


### Read metadata


In [4]:
# Set sample rate
try:
    wave_file=wave.open(file[0], "rb")
    sample_rate = wave_file.getframerate()
except:
    sample_rate=int(input('Please check sampling rate manually in the metadata file and set below in Hz! \n'))
    
# Set reference value
ref_value=int(input('Please check reference value in the files README and set below in uPa! \n'))

# Calibration value
cal_value=int(input('Please check calibration value in the files README and set below! \n what does the fullscale voltage correspond to in Pa? \n '))


# Set researcher
researcher= input('Please indicate the researcher who performs the crop by initials [SG, JR] \n ')

Please check sampling rate manually in the metadata file and set below in Hz! 
50000
Please check reference value in the files README and set below in uPa! 
1
Please check calibration value in the files README and set below! 
 what does the fullscale voltage correspond to in Pa? 
 1
Please indicate the researcher who performs the crop by initials [SG, JR] 
 SG


### Read in wav file

In [5]:
# Read in wav
filename = directory + file[0]
y, sr = librosa.load(filename, sr=sample_rate)
print('Number of data points: {}, fs: {} Hz, resulting file length: {} min.'.format(np.shape(y)[0], sr, np.shape(y)[0]/sr/60))

Number of data points: 120017051, fs: 50000 Hz, resulting file length: 40.00568366666666 min.


## Display wav file

In [6]:
# Display wav
plt.figure(figsize=(20, 5))
librosa.display.waveshow(y, sr)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Calculate spectogram

In [8]:
# Convert to spectogram 
Y = librosa.stft(y) 
#Ydb = librosa.amplitude_to_db(abs(Y), ref=1)
Ydb = librosa.amplitude_to_db(abs(Y), ref=ref_value) # Try if it works leaving away abs!
                        # Check whether reference should go in as amplitude or power!

In [14]:
# Convert to spectogram 
#Y = librosa.stft(y) # Fourrier spectrum - should have amplitude and phase components for each bins
#Y_power=abs(Y)**2 # To get powers we need to square the Fourrier spectrum and to get rid phase component we take the absolute value
#This should result in the power spectrum (Pa**2)

#To convert it to power density spectrum (Pa**2/Hz) we divide it by the width of the frequency bins

# We usually don't plot in Pa**2/Hz. Instead we want a logarithmic scale. Divide it by the reference value. 
# dB - for power values (for amplitude value is is squared!)


# 



#Ydb = librosa.amplitude_to_db(abs(Y))


## Ultimately, we want a power density spectrum! 

#Units(dBa relative to 1 uPa**2/Hz)

### Event handling function

In [9]:
def onclick(event):
    if event.key=='b':
        row.clear()
        events.append(event)
        row.append(event.xdata)
        plt.gca().set_title('onset: {}'.format(event.xdata))
        
    elif event.key=='e':
        events.append(event)
        row.append(event.xdata)
        plt.gca().set_title('offset: {}'.format(event.xdata))
        
        if len(row)==2:
            row.append(filename)
            time_stamps.loc[len(time_stamps)] = row
            plt.gca().set_title('Timestamps appended. \n onset: {} \n offset {}'.format(onset, offset))
            row.clear()
            
    elif event.key == 's':
        # save croppings
        print('saved dataframe')
        time_stamps.to_csv('time_stamps.csv')
        
    else:
        plt.gca().set_title('Could not interpret user input, please make a new selection:\n b: define onset timestamps \n e: define offset timestamp and add to df \n s: save df to csv file'.format(onset, offset))

### Initializing data structures

In [10]:
time_stamps=pd.DataFrame(columns=['onset', 'offset', 'filename'])
events = []
row=[]

### Crop Ydb into smaller segments

In [35]:
Ydb_list=[]
ideal_length=50000
segment_boundaries=list(range(0, np.shape(Ydb)[1], ideal_length))

if np.shape(Ydb)[1]>ideal_length:
    a=round(np.shape(Ydb)[1]/ideal_length)
    
    for i in list(range(a)):
        onset=segment_boundaries[i]
        try:
            offset=segment_boundaries[i+1]
        except: 
            offset=np.shape(Ydb)[1]
        Ydb_list.append(Ydb[onset:offset])
        print(onset, offset)

0 50000
50000 100000
100000 150000
150000 200000
200000 234409


In [38]:
for s in list(range(len(Ydb_list))):
    segment=Ydb_list[s]
    # Plot spectrogram
    plt.figure(figsize=(200, 5))
    # tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
    plt.gcf().canvas.mpl_connect('key_press_event', onclick)
    librosa.display.specshow(segment, cmap='seismic', sr=sr, x_axis='time', y_axis='hz')
    plt.xticks(np.arange(0, len(segment)+1, 10))
    plt.colorbar()
    print(s)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

0


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

IndexError: cannot do a non-empty take from an empty axes.

### Plotting and connecting eventhandler to plot

In [19]:
# Plot spectrogram
plt.figure(figsize=(200, 5))
# tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
plt.gcf().canvas.mpl_connect('key_press_event', onclick)
librosa.display.specshow(Ydb, cmap='seismic', sr=sr, x_axis='time', y_axis='hz')
plt.xticks(np.arange(0, len(Ydb)+1, 10))
plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x12c06d370>

In [22]:
time_stamps

Unnamed: 0,onset,offset,filename
0,842.03765,844.909211,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
1,849.358617,851.250553,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
2,856.73196,858.586944,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
3,870.880829,872.443733,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
4,879.143507,881.162606,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
5,926.733573,929.410736,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
6,952.981367,955.613625,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
7,971.88578,974.929328,/Users/saroltagabulya/git/Orca/2017-02-04--10-...


### Make sure to save time_stamps as csv by clicking s on graph!