## Import packages

In [1]:
%matplotlib widget

In [2]:
import os
import glob
import numpy  as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import ipywidgets as widgets
import wave

### Choose file to inspect

In [3]:
directory='/Users/saroltagabulya/git/Orca/'
wavs=glob.glob('*.wav')
file=[]

dropdown=widgets.Dropdown(
    options=wavs,
    description='Wav files:',
    disabled=False,
)

def dropdown_eventhandler(change):
    file.clear()
    file.append(change.new)
    print(change.new)
dropdown.observe(dropdown_eventhandler, names='value')
display(dropdown)

Dropdown(description='Wav files:', options=('2015-11-10--15-25.wav', '2017-02-04--10-14-06--00-16-35--BC.wav',…

2017-02-04--10-14-06--00-16-35--BC.wav


### Read metadata


In [5]:
try:
    wave_file=wave.open(file[0], "rb")
    sample_rate = wave_file.getframerate()
except:
    ('Please check sampling rate manually and set below!')
    sample_rate=100000

### Read in wav file

In [6]:
# Read in wav
filename = directory + file[0]
y, sr = librosa.load(filename, sr=sample_rate)
print('Number of data points: {}, fs: {} Hz, resulting file length: {} min.'.format(np.shape(y)[0], sr, np.shape(y)[0]/sr/60))

Number of data points: 99500996, fs: 100000 Hz, resulting file length: 16.583499333333332 min.


## Display wav file

In [7]:
# Display wav
plt.figure(figsize=(20, 5))
librosa.display.waveshow(y, sr)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Calculate spectogram

In [8]:
# Convert to spectogram 
Y = librosa.stft(y) 
#Ydb = librosa.amplitude_to_db(abs(Y), ref=1)
Ydb = librosa.amplitude_to_db(Y, ref=1) # Try if it works leaving away abs!
                        # Check whether reference should go in as amplitude or power!

In [53]:
# Convert to spectogram 
Y = librosa.stft(y) # Fourrier spectrum - should have amplitude and phase components for each bins
Y_power=abs(Y)**2 # To get powers we need to square the Fourrier spectrum and to get rid phase component we take the absolute value
#This should result in the power spectrum (Pa**2)

#To convert it to power density spectrum (Pa**2/Hz) we divide it by the width of the frequency bins

# We usually don't plot in Pa**2/Hz. Instead we want a logarithmic scale. Divide it by the reference value. 
# dB - for power values (for amplitude value is is squared!)


# 



#Ydb = librosa.amplitude_to_db(abs(Y))


## Ultimately, we want a power density spectrum! 

#Units(dBa relative to 1 uPa**2/Hz)

# Power spectrum vs amplitude
2017: signal is +-1 Volt data (what comes out of the amplifier)
2020: Calibrate: unit for sound is Pascal, in air the reference level is 20uPa, and in water it is 1uP. 

Power(density)= amplitude**2
nfft= 

Power spectrum (P**2)

reference value 

2015-11-10--15-25.wav: 96kHz, 24bit 




In [10]:
Ydb

array([[-27.377338 , -27.377338 , -27.377338 , ..., -27.377338 ,
        -27.377338 , -24.385162 ],
       [-27.377338 , -27.377338 , -27.377338 , ..., -27.377338 ,
        -27.377338 , -20.75101  ],
       [-27.377338 , -27.377338 , -27.377338 , ...,  -7.7992635,
         -9.980928 , -13.668762 ],
       ...,
       [-27.377338 , -27.377338 , -27.377338 , ..., -27.377338 ,
        -27.377338 , -27.377338 ],
       [-27.377338 , -27.377338 , -27.377338 , ..., -27.377338 ,
        -27.377338 , -27.377338 ],
       [-27.377338 , -27.377338 , -27.377338 , ..., -27.377338 ,
        -27.377338 , -27.377338 ]], dtype=float32)

### Plot spectrogram

In [8]:
#Normalize spectrogram
min_value=pd.DataFrame(Ydb).min().min()
Ydb_norm=np.array(pd.DataFrame(Ydb)-min_value)

In [12]:
#Whitening? (extract average background noise for each buffer)
# For each frequency bin, take the median (take the 10%th signal), frequency gain correction
mean_values=pd.DataFrame(Ydb_norm).mean()
Ydb_norm_w=np.array(pd.DataFrame(Ydb_norm).apply(lambda x: x-mean_values, axis=1))

In [13]:
# Plot spectrogram
plt.figure(figsize=(200, 5))
librosa.display.specshow(Ydb_norm, cmap='seismic', sr=sr, x_axis='time', y_axis='hz')
plt.xticks(np.arange(0, len(Ydb_norm)+1, 10))
plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x1344562e0>

In [14]:
plt.close()

### Event handling function

In [11]:
def onclick(event):
    if event.key=='b':
        row.clear()
        events.append(event)
        row.append(event.xdata)
        plt.gca().set_title('onset: {}'.format(event.xdata))
        
    elif event.key=='e':
        events.append(event)
        row.append(event.xdata)
        plt.gca().set_title('offset: {}'.format(event.xdata))
        
        if len(row)==2:
            row.append(filename)
            time_stamps.loc[len(time_stamps)] = row
            plt.gca().set_title('Timestamps appended. \n onset: {} \n offset {}'.format(onset, offset))
            row.clear()
            
    elif event.key == 's':
        # save croppings
        print('saved dataframe')
        time_stamps.to_csv('time_stamps.csv')
        
    else:
        plt.gca().set_title('Could not interpret user input, please make a new selection:\n b: define onset timestamps \n e: define offset timestamp and add to df \n s: save df to csv file'.format(onset, offset))

### Initializing data structures

In [12]:
time_stamps=pd.DataFrame(columns=['onset', 'offset', 'filename'])
events = []
row=[]

### Plotting and connecting eventhandler to plot

In [13]:
# Plot spectrogram
plt.figure(figsize=(200, 5))
# tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
plt.gcf().canvas.mpl_connect('key_press_event', onclick)
librosa.display.specshow(Ydb, cmap='seismic', sr=sr, x_axis='time', y_axis='hz')
plt.xticks(np.arange(0, len(Ydb)+1, 10))
plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x136279940>

In [22]:
time_stamps

Unnamed: 0,onset,offset,filename
0,842.03765,844.909211,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
1,849.358617,851.250553,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
2,856.73196,858.586944,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
3,870.880829,872.443733,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
4,879.143507,881.162606,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
5,926.733573,929.410736,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
6,952.981367,955.613625,/Users/saroltagabulya/git/Orca/2017-02-04--10-...
7,971.88578,974.929328,/Users/saroltagabulya/git/Orca/2017-02-04--10-...


### Make sure to save time_stamps as csv by clicking s on graph!