### Import packages

In [20]:
import os
import glob
import numpy  as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import ipywidgets as widgets
import soundfile as sf
import yaml
import math

### Choose file to inspect

In [2]:
directory='/Users/saroltagabulya/git/Orca/'
wavs=glob.glob('timestamp_csv/*.csv')
file=[]

dropdown=widgets.Dropdown(
    options=wavs,
    description='CSV files:',
    disabled=False,
)

def dropdown_eventhandler(change):
    file.clear()
    file.append(change.new)
    print(change.new)
dropdown.observe(dropdown_eventhandler, names='value')
display(dropdown)

Dropdown(description='CSV files:', options=('timestamp_csv/time_stamps_2020-12-15--23-21-29--00-40-00--A.csv',…

timestamp_csv/time_stamps_2017-02-04--10-14-06--00-16-35--BC_2.csv


### Read csv file as pandas

In [3]:
filename = directory + file[0]
time_stamps=pd.read_csv(filename, index_col=0)

# Also extract recording name for later use
recording_name=file[0][26:-4]

In [4]:
recording_name

'2017-02-04--10-14-06--00-16-35--BC_2'

### Read metadata

In [5]:
# Set sample rate
try:
    wave_file=wave.open(file[0], "rb")
    sample_rate = wave_file.getframerate()
except:
    sample_rate=int(input('Please check sampling rate manually in the metadata file and set below in Hz! \n'))
    
# Set reference value
ref_value=int(input('Please check reference value in the files README and set below in uPa! \n'))

# Calibration value
cal_value=int(input('Please check calibration value in the files README and set below! \n what does the fullscale voltage correspond to in Pa? \n '))


# Set researcher
researcher= input('Please indicate the researcher who performs the crop by initials [SG, JR] \n ')

Please check sampling rate manually in the metadata file and set below in Hz! 
100000
Please check reference value in the files README and set below in uPa! 
1
Please check calibration value in the files README and set below! 
 what does the fullscale voltage correspond to in Pa? 
 1
Please indicate the researcher who performs the crop by initials [SG, JR] 
 SG


### Read in wav

In [10]:
recording_name=recording_name[:-2]

y, sr=librosa.load('wav_files/' +recording_name + '.wav', sr=sample_rate)

In [21]:
round(time_stamps.onset)


0    842.0
1    849.0
2    857.0
3    871.0
4    879.0
5    927.0
6    953.0
7    972.0
Name: onset, dtype: float64

In [23]:

time_stamps.onset.apply(lambda x: math.floor(x))

0    842
1    849
2    856
3    870
4    879
5    926
6    952
7    971
Name: onset, dtype: int64

### Onsets & offsets in csv are in seconds => Convert to samples by multiplying them with the fs

second * fs = samples


In [31]:
time_stamps.onset=time_stamps.onset.apply(lambda x: math.floor(x)) # Round down
time_stamps.offset=time_stamps.offset.apply(lambda x: math.ceil(x)) # Round up and + 1 

time_stamps['onset_samples']=time_stamps.onset.apply(lambda x: x*sr) 
time_stamps['offset_samples']=time_stamps.offset.apply(lambda x: x*sr + 1)

### Function for cropping wav and saving

In [13]:
try:
    os.mkdir('./call_segments/' + recording_name)
    
except Exception as e:
    print(e)

[Errno 17] File exists: './call_segments/2017-02-04--10-14-06--00-16-35--BC'


In [23]:
def crop_call(row, y, sr, recording_name):
    call_filename= recording_name + '_' + str(row.onset_samples) + '_' + str(row.offset_samples) + '.wav'
    call=y[row.onset_samples: row.offset_samples]  # Interval closed at the beginning and open at the end
    call_path='./call_segments/' + recording_name +'/' +  call_filename
    sf.write(call_path, call, sr) 
    return call_path

In [24]:
time_stamps['call_wav']=time_stamps.apply(lambda x: crop_call(x, y, sr, recording_name), axis=1)

### Prepare a metadata dataframe

In [34]:
def prep_metadata(time_stamps, sample_rate, ref_value, cal_value, researcher):
    meta_df=pd.DataFrame()
    
    meta_df['segment_file']=time_stamps.apply(lambda x: x.call_wav.split('/')[-1], axis=1)
    meta_df['wav_origin']=time_stamps.apply(lambda x: x.call_wav.split('/')[-1].split('_')[0] + '.wav', axis=1)
    meta_df['onset_seconds']=time_stamps.onset
    meta_df['offset_seconds']=time_stamps.offset
    meta_df['duration_seconds']=time_stamps.offset-time_stamps.onset 
    meta_df['onset_samples']=time_stamps.onset_samples
    meta_df['offset_samples']=time_stamps.offset_samples
    meta_df['duration_samples']=time_stamps.offset_samples-time_stamps.onset_samples
    meta_df['sample_rate']=pd.Series([sample_rate]*len(time_stamps))
    meta_df['ref_value']=pd.Series([ref_value]*len(time_stamps))
    meta_df['cal_value']=pd.Series([cal_value]*len(time_stamps))
    meta_df['researcher']=pd.Series(['SG']*len(time_stamps))
    
    return meta_df

In [35]:
researcher='SG'
meta_df=prep_metadata(time_stamps, sample_rate, ref_value, cal_value, researcher)

### Create metadata files

In [36]:
def write_metadata(row, output_dir):
    
    #Prepare filename
    metafilename=row.wav_origin[:-4] + '/'+ row.segment_file[:-3]  + 'yml'
    
    #Convert row to dictionary
    d = {k: (v if type(v).__module__ != 'numpy' else v.item()) for k,v in row.to_dict().items()}
    
    
    with open(output_dir + metafilename, 'w') as outfile:
        yaml.dump(d, outfile)

In [37]:
output_dir='./call_segments/' 
meta_df.apply(lambda x: write_metadata(x, output_dir), axis =1)

0      None
1      None
2      None
3      None
4      None
       ... 
119    None
120    None
121    None
122    None
123    None
Length: 124, dtype: object

### Save csv under new name

In [40]:
time_stamps.to_csv('call_data/call_data_' + recording_name + '.csv')

### Read in metadata

In [None]:
metafile=''

with open(metafile) as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    metadata = yaml.load(file, Loader=yaml.FullLoader)
    
print(metadata)