# Exploring a database of wildlife recordings

This database contains a series of 1 minute recordings containing at least one call of known a wildlife species. These are identified in the file `train_tp.csv` with start and end times and frequency bands. 

Each recording may also contain calls of other species. Some of them are marked in the file `train_fp.csv` as being wrongly identified by some automated algorithm.

Here we explore the times and frequency bands of each species and pinpoint them in a representation of the recording. 

In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)



We'll use librosa to read audio and perform some analysis

In [None]:
import librosa as lr
import librosa.display as lrd
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

Let's read both tables to identify true and false positives on the spectrum

In [None]:
tpdf = pd.read_csv('/kaggle/input/rfcx-species-audio-detection/train_tp.csv')
fpdf = pd.read_csv('/kaggle/input/rfcx-species-audio-detection/train_fp.csv')


## Number of samples per call type and species

In [None]:
tpdf['duration'] = tpdf.t_max-tpdf.t_min
tpdf['bandwidth'] = tpdf.f_max-tpdf.f_min

tpdf.pivot_table(index='species_id',columns='songtype_id',
                 values='duration',aggfunc='count').fillna(0)

## Metadata per species

Notice how the duration and frequency bands are highly characteristic of each species. 

The trouble will be to establish the event boundaries in new sound files

### Call duration


In [None]:
fig,ax = plt.subplots(1,figsize=(10,4))
sns.boxplot(data=tpdf,y='duration',x='species_id',hue='songtype_id')

### Lower frequency of call band

In [None]:
fig,ax = plt.subplots(1,figsize=(10,4))
sns.boxplot(data=tpdf,y='f_min',x='species_id',hue='songtype_id')

### Uper frequency of call band

In [None]:
fig,ax = plt.subplots(1,figsize=(10,4))
sns.boxplot(data=tpdf,y='f_max',x='species_id',hue='songtype_id')

### Bandwidth

In [None]:
fig,ax = plt.subplots(1,figsize=(10,4))
sns.boxplot(data=tpdf,y='bandwidth',x='species_id',hue='songtype_id')

## Sample audio

Now let's select a sample audio file and mark the events 

We'll use the MEL spectrum to visualize the audio data. It's a sort of spectrogram where the frequency bands are logarithmic

In [None]:
row=tpdf.sample()

base_dir = '/kaggle/input/rfcx-species-audio-detection/train/'
w,sr = lr.load(os.path.join(base_dir,row.iloc[0]['recording_id']+'.flac'))

In [None]:
fmax_mel=11000
hop_length=256
n_wind=1024
ms = lr.feature.melspectrogram(w,sr,n_fft=n_wind,hop_length=hop_length,fmax=fmax_mel)

In [None]:
fig,ax= plt.subplots(1)

# convert to dB because the range is too wide
dbs = 20*np.log10(ms)


# This shows the MEL spectrogram
img = lrd.specshow(dbs,sr=sr,fmax=fmax_mel,
                   y_axis="mel",x_axis="time",
                   hop_length=hop_length,
                   cmap='gray')

# Plot the True positive events on this file
recrows = tpdf[tpdf.recording_id==row.iloc[0].recording_id]
print(f'Number of TRUE positives: {len(recrows)}')
for ir,rrow in recrows.iterrows():
    rect = patches.Rectangle((rrow.t_min,rrow.f_min),
                             rrow.t_max-rrow.t_min,rrow.f_max-rrow.f_min,
                             linewidth=1,edgecolor='g',facecolor='g',alpha=.2)
    #plt.axvspan(rrow.t_min,rrow.t_max,color='g',alpha=.2)
    ax.add_patch(rect)
    
# Plot the False Positives on this file
recrows = fpdf[fpdf.recording_id==row.iloc[0].recording_id]
print(f'Number of FALSE positives: {len(recrows)}')
for ir,rrow in recrows.iterrows():
    rect = patches.Rectangle((rrow.t_min,rrow.f_min),
                             rrow.t_max-rrow.t_min,rrow.f_max-rrow.f_min,
                             linewidth=1,edgecolor='r',facecolor='r',alpha=.2)
    #plt.axvspan(rrow.t_min,rrow.t_max,color='g',alpha=.2)
    ax.add_patch(rect)


Notice how some of the true events are hard to distinguish from the background noise

## Extract a normalised spectrum

In [None]:
avs = np.median(dbs,axis=1)
dvs = np.diff(np.percentile(dbs,[25,75],axis=1),axis=0)[0]

zs = (dbs-np.tile(avs[:,np.newaxis],(1,dbs.shape[1])))/np.tile(dvs[:,np.newaxis],(1,dbs.shape[1]))
img = lrd.specshow(zs,sr=sr,fmax=fmax_mel,
                   y_axis="mel",x_axis="time",
                   hop_length=hop_length,
                   cmap='gray')


In [None]:
zzs = zs.copy()
zzs[zzs<1] = 0

fig,ax= plt.subplots(1,figsize=(12,4))


img = lrd.specshow(zzs,sr=sr,fmax=fmax_mel,
                   y_axis="mel",x_axis="time",
                   hop_length=hop_length,
                   cmap='gray')

# Plot the True positive events on this file
trecrows = tpdf[tpdf.recording_id==row.iloc[0].recording_id]
print(f'Number of TRUE positives: {len(trecrows)}')
for ir,rrow in trecrows.iterrows():
    rect = patches.Rectangle((rrow.t_min,rrow.f_min),
                             rrow.t_max-rrow.t_min,rrow.f_max-rrow.f_min,
                             linewidth=1,edgecolor='g',facecolor='g',alpha=.5)
    #plt.axvspan(rrow.t_min,rrow.t_max,color='g',alpha=.2)
    ax.add_patch(rect)
    
# Plot the False Positives on this file
frecrows = fpdf[fpdf.recording_id==row.iloc[0].recording_id]
print(f'Number of FALSE positives: {len(frecrows)}')
for ir,rrow in frecrows.iterrows():
    rect = patches.Rectangle((rrow.t_min,rrow.f_min),
                             rrow.t_max-rrow.t_min,rrow.f_max-rrow.f_min,
                             linewidth=1,edgecolor='r',facecolor='r',alpha=.2)
    #plt.axvspan(rrow.t_min,rrow.t_max,color='g',alpha=.5)
    ax.add_patch(rect)


In [None]:
t=np.linspace(0,len(w-n_wind)/sr,dbs.shape[1])

for ir,rrow in trecrows.iterrows():
    idx = (t>rrow.t_min) & (t<rrow.t_max)
    fig,ax = plt.subplots(1)
    img = lrd.specshow(zzs[:,idx],sr=sr,fmax=fmax_mel,
                   y_axis="mel",x_axis="time",
                   hop_length=hop_length,
                   cmap='gray')
    ax.axhspan(rrow.f_min,rrow.f_max,color='g',alpha=.4)
for ir,rrow in frecrows.iterrows():
    idx = (t>rrow.t_min) & (t<rrow.t_max)
    fig,ax = plt.subplots(1)
    img = lrd.specshow(zzs[:,idx],sr=sr,fmax=fmax_mel,
                   y_axis="mel",x_axis="time",
                   hop_length=hop_length,
                   cmap='gray')
    ax.axhspan(rrow.f_min,rrow.f_max,color='r',alpha=.4)


## Compare a few samples of the same species

In [None]:
srows = tpdf[(tpdf.species_id==row.iloc[0].species_id) & (tpdf.songtype_id==row.iloc[0].songtype_id)]
srows.sample(10)

In [None]:
def get_ev_zspec(row):
    w,sr = lr.load(os.path.join(base_dir,row['recording_id']+'.flac'))
    ms = lr.feature.melspectrogram(w,sr,n_fft=n_wind,hop_length=hop_length,fmax=fmax_mel)    
    dbs = 20*np.log10(ms)
    avs = np.median(dbs,axis=1)
    dvs = np.diff(np.percentile(dbs,[25,75],axis=1),axis=0)[0]

    zs = (dbs-np.tile(avs[:,np.newaxis],(1,dbs.shape[1])))
    zd = np.tile(dvs[:,np.newaxis],(1,dbs.shape[1]))
    t=np.linspace(0,len(w-n_wind)/sr,dbs.shape[1])

    idx = (t>row.t_min) & (t<row.t_max)
    zs[zs<zd]=0
    return zs[:,idx]


In [None]:
nsam = 6
ncols = 3
nrows = int(np.ceil(nsam/ncols))

fig,ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
axf = ax.flatten()

for ii, (ir, srw) in enumerate(srows.sample(nsam).iterrows()):
    zss = get_ev_zspec(srw)
    img = lrd.specshow(zss,sr=sr,fmax=fmax_mel,
               y_axis="mel",x_axis="time",
               hop_length=hop_length,
               cmap='gray',ax=axf[ii])
    axf[ii].axhspan(srw.f_min,srw.f_max,color='g',alpha=.4)

    