In [1]:
from scipy.io import wavfile
import os
import numpy as np
import pandas as pd

SETUP

In [2]:
# Variables you may change. Note: number of spikes hard-limited to 32. Also limited by sample rate, anyway (see Nyquist's theorem).
numberSpikes = 20 # number of integer multiples of the fundamental frequency to record (including 0, i.e. 1/2 the FF).
FFTSize = 8192 # have to use a multiple of 2 for optimal speed.

#open directory with the samples in it.
os.chdir('./Training_data/')

# next, make a NP array of every wav file in the directory
SampleList = np.array(os.listdir())
SampleList = SampleList[ np.char.endswith(SampleList, '.wav') ] #ignore everything that's not a .wav file.

#make a fresh blank series.
SampleSpectra = pd.DataFrame(columns = np.arange(numberSpikes))

CORE LOOP

In [3]:
for SampleName in SampleList:
    SampleRate, Audiodata = wavfile.read(SampleName)
    # spectrum
    from scipy.fft import fft # fourier transform
    m = len(Audiodata) 
    #AudioFreq = fft(Audiodata,n=FFTSize,axis=0)
    AudioFreq = fft(Audiodata[:,1]*np.hanning(np.shape(Audiodata)[0]),n=FFTSize,axis=0) #with a Hanning window
    AudioFreq = AudioFreq[0:int(np.ceil((np.shape(AudioFreq)[0])/2.0))] #Left half of the spectrum
    MagFreq = np.abs(AudioFreq) # Magnitude
    MagFreq = MagFreq / float(m)
    # power spectrum
    MagFreq = MagFreq**2
    if m % 2 > 0: # ffte odd 
        MagFreq[1:len(MagFreq)] = MagFreq[1:len(MagFreq)] * 2
    else:# fft even
        MagFreq[1:len(MagFreq) -1] = MagFreq[1:len(MagFreq) - 1] * 2 
    # HPS (Harmonic Product Spectrum) Algorithm to determine fundamental frequency
    HPSSize = int(np.ceil(len(MagFreq))/3.0 + 1)
    MagFreq2 = MagFreq.copy().reshape(int(np.ceil(len(MagFreq))/2.0),2).mean(1)[0:HPSSize]
    MagFreq3 = np.concatenate([MagFreq.copy(),np.zeros(3 - int(len(MagFreq)) % 3)]).reshape(int(np.ceil(len(MagFreq))/3.0 + 1),3).mean(1)[0:HPSSize]
    HPS = MagFreq[0:HPSSize]*MagFreq2*MagFreq3
    freqAxis = np.arange(0,int(np.ceil((m+1)/2.0)), int(np.ceil((m+1)/2.0))*2/FFTSize) * (SampleRate / m);
    freqAxisDownscaled = np.arange(0,int(np.ceil((m+1)/2.0)), int(np.ceil((m+1)/2.0))*6/FFTSize) * (SampleRate / m);
    FFLocHPS = np.argmax(HPS[freqAxisDownscaled >= 50]) + int(len(HPS[freqAxisDownscaled < 50])) # don't look below 50 Hz for it, due to noise.
    FFLocation = FFLocHPS*3 - 1 # FFLocHPS uses an index that is scaled down by 1/3. Have to reverse that.
    # Have to ensure the fundamental frequency is accurate. Take loudest spike, get its approximate ratio, round to nearest int.
    MaxLoc = np.argmax(MagFreq[freqAxis >= 50]) + int(len(MagFreq[freqAxis < 50])) # don't look below 50 Hz for it, due to noise.
    MaxRatio = np.max([np.round(freqAxis[MaxLoc]/freqAxis[FFLocation]), 1]) 
    FFreq = freqAxis[MaxLoc]/MaxRatio
    MagFreqLog = 10*np.log10(MagFreq)
    FFreqLoc = np.argmin(np.abs(freqAxis - FFreq))
    # If the frequency on either side of the "spike" is actually louder, make that the new spike.
    FFreqLoc = FFreqLoc - ((MagFreqLog <= np.roll(MagFreqLog,1)) & (np.roll(MagFreqLog,-1) <= np.roll(MagFreqLog,1))) #if the left entry is >= the entry and the right entry, then make the left entry the new spike location
    FFreqLoc = FFreqLoc + ((MagFreqLog <= np.roll(MagFreqLog,-1)) & (np.roll(MagFreqLog,1) <= np.roll(MagFreqLog,-1)))  #if the right entry is >= the entry and the left entry, then make the right entry the new spike location
    Multiples = [FFreq*i for i in np.arange(0,numberSpikes)]
    Multiples[0] = FFreq/2 # It turns out, telling the YM2612 to use the "0th" multiple means 1/2 the FF.
    MultiplesLoc = [np.argmin(np.abs(freqAxis - i)) for i in Multiples]
    # Next, make an integer ratio-indexed list of the amplitudes!
    MultiplesMag = MagFreq[MultiplesLoc]
    MultiplesdB = MagFreqLog[MultiplesLoc]
    # Write these magnitudes/dBs to the Pandas array! (Index = SampleName)
    #tempIndex = pd.MultiIndex.from_product([[SampleName],['Pwr','dB']])
    #SampleRow = pd.DataFrame(np.vstack([MultiplesMag, MultiplesdB]), index = tempIndex, columns = np.arange(0,numberSpikes))
    #SampleSpectra = pd.concat([SampleSpectra, SampleRow])
    SampleRow = pd.DataFrame([MultiplesdB], index = [SampleName], columns = np.arange(0,numberSpikes))
    SampleSpectra = pd.concat([SampleSpectra, SampleRow])

# Had to choose between Pwr and dB measures. Can easily replace MultiplesdB with MultiplesMag.

CSV FILE OUTPUT

In [4]:
# To avoid a column label conflict with the reduced audio dataset.
MultipleLabels = ['HalfFF','1FF','2FF','3FF','4FF','5FF','6FF','7FF','8FF','9FF','10FF','11FF','12FF','13FF','14FF','15FF','16FF','17FF','18FF','19FF','20FF','21FF','22FF','23FF','24FF','25FF','26FF','27FF','28FF','29FF','30FF','31FF',]
for i in np.arange(0,numberSpikes):
    SampleSpectra = SampleSpectra.rename(columns={i: MultipleLabels[i]})

SampleSpectra.to_csv('000SampleSpectra.csv')


print('The first few samples\' spectra for your viewing pleasure:')
print(SampleSpectra.head())

The first few samples' spectra for your viewing pleasure:
                              HalfFF        1FF        2FF        3FF  \
01 - Circle of Life_0.wav  27.345405  30.462234 -19.134841 -25.059827   
01 - Circle of Life_1.wav -10.175424  20.070666  10.026839  11.563651   
01 - Circle of Life_2.wav -57.000795 -39.625398 -61.329705 -53.079755   
01 - Circle of Life_3.wav -13.490103  31.465241 -11.517152  -8.075195   
01 - Circle of Life_4.wav -13.608143  26.469035  17.800095   6.472096   

                                 4FF        5FF        6FF        7FF  \
01 - Circle of Life_0.wav -27.301782 -28.422371 -29.965447 -31.425729   
01 - Circle of Life_1.wav   5.924881  -0.834964  -4.774746  -6.389716   
01 - Circle of Life_2.wav -53.079755 -53.079755 -53.079755 -53.079755   
01 - Circle of Life_3.wav -13.541514 -23.127384 -28.912905 -30.748610   
01 - Circle of Life_4.wav   9.908368  -0.080632 -15.172361  -4.412612   

                                 8FF        9FF       10FF      