To train a network on the mix of FM and Cf calls, I have to generate spectrograms of the audio snippets. The spectrograms also need to be normalised to have values between 0-1, which allows the network parameter estimation to run well. 

In [1]:
from __future__ import division
from keras.utils import to_categorical
import matplotlib.pyplot as plt
plt.rcParams['agg.path.chunksize'] = 10000
import numpy as np 
import pandas as pd
import scipy.signal as signal 
import soundfile as sf

Using TensorFlow backend.


In [2]:
%matplotlib notebook

In [3]:
# load the csv file
audio_labels = pd.read_csv('audio_labels.csv')

In [4]:
# make a column to convert groupsie labels into numeric
text_to_int = {'none':0,
                       'single':1,
                       'multi':2
                      }
def convert_txt_to_int(X):
    entry = X['groupsize_label']
    return( text_to_int[entry])

audio_labels['groupsize_intlabel'] = audio_labels.apply(convert_txt_to_int, 1 )

In [5]:

audio_labels.head()

Unnamed: 0,date_recorded,file_name,time_start,time_end,duration,channel_num,groupsize_label,Ferrum_call,BlEuMi_call,Myotis_call,groupsize_intlabel
0,2018-08-16,T0000370.WAV,45.0,45.2,0.2,1,single,0,1,0,1
1,2018-08-16,T0000370.WAV,52.0,52.2,0.2,2,single,0,1,1,1
2,2018-08-16,T0000370.WAV,54.5,54.7,0.2,2,none,0,0,0,0
3,2018-08-16,T0000370.WAV,59.6,59.8,0.2,2,single,0,1,0,1
4,2018-08-16,T0000371.WAV,9.1,9.3,0.2,2,none,0,0,0,0


In [6]:
audio_labels.tail()

Unnamed: 0,date_recorded,file_name,time_start,time_end,duration,channel_num,groupsize_label,Ferrum_call,BlEuMi_call,Myotis_call,groupsize_intlabel
212,2018-08-18,T0000890.WAV,48.95,49.15,0.2,1,multi,0,1,1,2
213,2018-08-18,T0000895.WAV,18.6,18.8,0.2,2,single,0,1,0,1
214,2018-08-18,T0000896.WAV,5.2,5.4,0.2,0,single,1,0,0,1
215,2018-08-18,T0000896.WAV,5.8,6.0,0.2,0,single,1,0,1,1
216,2018-08-18,T0000896.WAV,37.15,37.35,0.2,1,single,0,1,0,1


In [7]:
audio_labels['date_recorded'].unique()

array(['2018-08-16', '2018-08-18', '2018-08-19'], dtype=object)

In [8]:
folder_paths = {'2018-08-16':'/media/tbeleyur/THEJASVI_DATA_BACKUP_3/fieldwork_2018_002/horseshoe_bat/audio/2018-08-16/',
                '2018-08-18':'/media/tbeleyur/THEJASVI_DATA_BACKUP_3/fieldwork_2018_002/horseshoe_bat/audio/2018-08-18/',
                 '2018-08-19':'/media/tbeleyur/THEJASVI_DATA_BACKUP_3/fieldwork_2018_002/horseshoe_bat/audio/2018-08-19/ch1/'
               }



In [9]:
def get_audio_from_dfrow(dfrow, fs = 250000):
    start_ind, end_ind = int(np.around(dfrow['time_start']*fs)), int(np.around((dfrow['time_start']+0.2)*fs))
    audio, fs = sf.read(folder_paths[dfrow['date_recorded']]+dfrow['file_name'],
                        start=start_ind, stop=end_ind+1)
    chnum  = dfrow['channel_num']
    audio_snippet = audio[:,chnum]
    return(audio_snippet, fs)
    

In [10]:
def make_normalised_spectrogram(dfrow):
    au, fs = get_audio_from_dfrow(dfrow)
    f,t, S = signal.spectrogram(au, fs=fs, nperseg=1024, noverlap=512)
    S = 20*np.log10(abs(S))
    # normalise so values lie between 0-1
    S *= 1/np.max(S)
    # reshape so the array has a 1 dim 'thickness'
    S = S.reshape(1,S.shape[0],S.shape[1])
    return(S)

In [11]:
def get_specgram_label(dfrow):
    im_label = np.array([dfrow['Ferrum_call'],
                              dfrow['BlEuMi_call'],
                              dfrow['Myotis_call'],
                              dfrow['groupsize_intlabel']])
    return(im_label)
                

In [12]:
def make_specgram_and_label(dfrow):
    S = make_normalised_spectrogram(dfrow)
    label = get_specgram_label(dfrow)
    return(S, label)

In [13]:

s= make_normalised_spectrogram(audio_labels.iloc[0,:])
print(s.shape)


(1, 513, 96)


#### Let's generate the spectrograms and labels for all of the labelled data now:

In [14]:
all_specgrams = np.zeros((audio_labels.shape[0],513,96))
all_labels = np.zeros((audio_labels.shape[0], 4))

In [21]:
for i, row in audio_labels.iterrows():
    s, label = make_specgram_and_label(row)
    all_specgrams[i,:,:] = s
    all_labels[i,:] = label