Collects true-positive and false-positive audio data for a given species for side-by-side comparison of spectrograms, sound, and waveshape.  Limits false positive data to same length as true positives to increase the usefulness for comparison.

Normalization is enabled to make the spectral patterns more clear.

In [None]:
speciesOfInterest = 22

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import soundfile as sf
import scipy.signal as signal
import matplotlib.pyplot as plt 
import matplotlib.patches as patches
import seaborn as sns

from IPython.display import Audio

def getFPAudioWithSpeciesId(speciesId):
    train_fp = pd.read_csv('../input/rfcx-species-audio-detection/train_fp.csv')
    return getAudioWithSpeciesId(speciesId, train_fp)
    
def getTPAudioWithSpeciesId(speciesId):
    train_tp = pd.read_csv('../input/rfcx-species-audio-detection/train_tp.csv')
    return getAudioWithSpeciesId(speciesId, train_tp)

def getAudioWithSpeciesId(speciesId, trainSet):
    recordingIds = []
    speciesMinTime = []
    speciesMaxTime = []
    speciesMinFreq = []
    speciesMaxFreq = []
    for i, rid in enumerate(trainSet['species_id']):
        if rid == speciesId:
            recordingIds.append(trainSet['recording_id'][i])
            speciesMinTime.append(trainSet['t_min'][i])
            speciesMaxTime.append(trainSet['t_max'][i])
            speciesMinFreq.append(trainSet['f_min'][i])
            speciesMaxFreq.append(trainSet['f_max'][i])
    return recordingIds, speciesMinTime, speciesMaxTime, speciesMinFreq, speciesMaxFreq

def normalize(data):
    maxSample = max(data)
    maxVal = np.finfo(np.float32).max  ## float32-max selection is arbitrary
    fraction = maxSample / maxVal
    factor = 1.0 / fraction
    data_out = []
    for i, datum in enumerate(data):
        data_out.append(data[i] * factor)
    return np.array(data_out)

def hearAudioOfSpecies(speciesId, truePositive):
    if truePositive:
        audioIdList, minTimeList, maxTimeList, speciesMinFreqs, speciesMaxFreqs = getTPAudioWithSpeciesId(speciesId)
    else:
        audioIdList, minTimeList, maxTimeList, speciesMinFreqs, speciesMaxFreqs = getFPAudioWithSpeciesId(speciesId)
    defaultSampleRate = 48000
    allAudioData = []
    startTimes = []
    endTimes = []
    startTimes.append(0)
    currentTime = 0
    
    for i, audioId in enumerate(audioIdList):
        path = '../input/rfcx-species-audio-detection/train/' + str(audioId) + '.flac' 
        data, samplerate = sf.read(path)
        minTimeSamples = int(minTimeList[i] * samplerate)
        maxTimeSamples = int(maxTimeList[i] * samplerate)
        relevantData = data[minTimeSamples:maxTimeSamples]
        relevantData = normalize(relevantData)
        allAudioData.extend(relevantData)
        currentTime += len(relevantData)
        startTimes.append(currentTime)
        endTimes.append(currentTime)
        
    return allAudioData, defaultSampleRate, len(audioIdList), speciesMinFreqs, speciesMaxFreqs, startTimes, endTimes


dataTp, samplerateTp, numSourcesTp, minFreqTp, maxFreqTp, startTimesTp, endTimesTp = hearAudioOfSpecies(speciesOfInterest, True)
dataFp, samplerateFp, numSourcesFp, minFreqFp, maxFreqFp, startTimesFp, endTimesFp = hearAudioOfSpecies(speciesOfInterest, False)

dataTp = np.array(dataTp)
dataFp = np.array(dataFp[0:len(dataTp)]) 

print('True Positive data taken from ' + str(numSourcesTp) + ' recordings')
print('False Positive data taken from ' + str(numSourcesFp) + ' recordings')

In [None]:
Audio(dataTp, rate=samplerateTp)

In [None]:
Audio(dataFp, rate=samplerateFp)

In [None]:
def makePatch(ax, data, minFreq, maxFreq, minTime, maxTime, clr):
    rect = patches.Rectangle((minTime, minFreq), maxTime-minTime, maxFreq-minFreq, linewidth=1, facecolor=clr, alpha=.2)
    ax.add_patch(rect)

**True Positive Spectrogram**

In [None]:
fig, ax = plt.subplots(1,figsize=(20,5))
Pxx, freqs, bins, im = plt.specgram(dataTp, Fs=samplerateTp)

for i in range(numSourcesTp):
    makePatch(ax, Pxx, minFreqTp[i], maxFreqTp[i], startTimesTp[i], endTimesTp[i], 'r')

plt.savefig('spectrum-tp.png', dpi=600, format='png')

**False Positive Spectrogram**

In [None]:
fig, ax = plt.subplots(1,figsize=(20,5))
Pxx, freqs, bins, im = plt.specgram(dataFp, Fs=samplerateFp)

for i in range(numSourcesFp):
    makePatch(ax, Pxx, minFreqFp[i], maxFreqFp[i], startTimesFp[i], endTimesFp[i], 'r')

plt.savefig('spectrum-fp.png', dpi=600, format='png')