In [6]:
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import scipy.signal
from scipy import stats
from scipy.io import wavfile
import math
from sklearn.metrics import pairwise_distances


bandLimits = [0, 200, 400, 800, 1600, 3200, 6400]    
combFilterPulses = 8
minBpm = 60
maxBpm = 240

resampleSignal = True
resampleRatio = 4

hop_length = 512
frame_length = 1024



In [7]:
__methods = []
    
def detect_metre(signal, tempo: int, bandlimits, maxFreq, npulses):
        n = int(npulses * maxFreq * (60 / tempo))
        nbands = len(bandlimits)

        __methods.append(__five_forth)
        __methods.append(__four_forth)
        __methods.append(__six_eigth)
        __methods.append(__three_forth)

        metres = {}
        for method in __methods:
            metre, metre_dft = method(tempo, maxFreq, npulses)
            metres[metre] = metre_dft

        maxe = 0
        done = 0
        todo = len(metres.keys())
        for metrum in metres:
            done += 1
            percent_done = 100 * done / todo
            print("%.2f" % percent_done, "%")

            e = 0

            for band in range(0, nbands):
                filt = scipy.correlate(signal[band], metres[metrum])
                #filt = librosa.segment.cross_similarity(signal[band], metres[metrum])
                f_filt = abs(np.fft.fft(filt))
                x = abs(f_filt) ** 2
                e = e + sum(x)

            if e > maxe:
                song_metre = metrum
                maxe = e

        return song_metre
    
def __four_forth(song_tempo: int, sampling_frequency: int, npulses: int):
        fil = np.zeros(int(4 * sampling_frequency * (60 / song_tempo)))
        nstep = np.floor(60 / song_tempo * sampling_frequency)

        value = 1 / 2
        fil[int(1 * nstep)] = 1 * value
        fil[int(3 * nstep)] = 1 * value

        return "4\\4", fil

def __three_forth(song_tempo: int, sampling_frequency: int, npulses: int):
        fil = np.zeros(int(6 * sampling_frequency * (60 / song_tempo)))
        nstep = np.floor(60 / song_tempo * sampling_frequency)

        value = 1 / 2
        fil[int(2 * nstep)] = 1 * value
        fil[int(5 * nstep)] = 1 * value

        return "3\\4", fil

def __five_forth(song_tempo: int, sampling_frequency: int, npulses: int):
        fil = np.zeros(int(5 * sampling_frequency * (60 / song_tempo)))
        nstep = np.floor(60 / song_tempo * sampling_frequency)

        value = 1 / 3
        fil[int(1 * nstep)] = 1 * value
        fil[int(3 * nstep)] = 1 * value
        fil[int(4 * nstep)] = 1 * value

        return "5\\4", fil

def __six_eigth(song_tempo: int, sampling_frequency: int, npulses: int):
        fil = np.zeros(int(3 * sampling_frequency * (60 / song_tempo)))
        nstep = np.floor((60 / song_tempo * sampling_frequency) / 2)

        value = 1 / 2
        fil[int(0 * nstep)] = 1 * value 
        fil[int(3 * nstep)] = 1 * value

        return "6\\8", fil

In [8]:
def detect_tempo(signal, accuracy: int, minBpm: int, maxBpm: int, bandsLimits, sr,
                     combFilterPulses):
        n = len(signal[0])
        bands_amount = len(bandsLimits)
        dft = np.zeros([bands_amount, n], dtype=complex)

        if minBpm < 60:
            minBpm = 60

        if maxBpm > 240:
            maxBpm = 240

        for band in range(0, bands_amount):
            dft[band] = np.fft.fft(signal[band])

        maxEnergy = 0
        for bpm in range(minBpm, maxBpm, accuracy):
            this_bpm_energy = 0
            fil = np.zeros(n)

            filter_step = np.floor(60 / bpm * sr)
            percent_done = 100 * (bpm - minBpm) / (maxBpm - minBpm)
            print("%.2f" % percent_done, "%")

            for a in range(0, combFilterPulses):
                fil[a * int(filter_step) + 1] = 1

            dftfil = np.fft.fft(fil)

            for band in range(0, bands_amount):
                x = (abs(dftfil * dft[band])) ** 2
                this_bpm_energy = this_bpm_energy + sum(x)

            if this_bpm_energy > maxEnergy:
                songBpm = bpm
                maxEnergy = this_bpm_energy

        return songBpm

In [13]:
def detect_tempo_metre(song):
    signal, sr = librosa.load(song)
#     signal_comp, sr  = librosa.load(song, offset=10, duration=20) #load signal after 10s

    
#     chroma_comp = librosa.feature.chroma_cqt(y=signal_comp, sr=sr, hop_length=hop_length)

#     # Use time-delay embedding to get a cleaner recurrence matrix
#     x_ref = librosa.feature.stack_memory(chroma_ref, n_steps=10, delay=3)
#     x_comp = librosa.feature.stack_memory(chroma_comp, n_steps=10, delay=3)
        
    #xsim = librosa.segment.cross_similarity(x_comp, x_ref, metric='cosine', mode='distance')
    
    sample_length = combFilterPulses * sr
    seconds = sample_length * 4
    song_length = signal.size

    start = int(np.floor(song_length / 2 - seconds / 2))
    stop = int(np.floor(song_length / 2 + seconds / 2))
    
    if start < 0:
        start = 0
    if stop > song_length:
        stop = song_length

    sample = signal[start:stop]
    centred = __center_sample_to_beat(sample, sample_length)
    
    if resampleSignal:
        centred = scipy.signal.resample(centred, int(len(centred) / resampleRatio))
        sr /= resampleRatio

    print(f'Preparing filterbank for song...')
    filterBanks = __prepare_filterbanks(centred, bandLimits, sr)
    
    print(f'Hanning song...')
    hanningWindow = __hann(filterBanks, 0.2, bandLimits, sr)
    
    print(f'Differentiating song ...')
    diffrected = __diffrect(hanningWindow, len(bandLimits))
    
    print(f"Detecting song's tempo ...")
    print(f'First attempt...')
    songTempo = detect_tempo(diffrected,5,minBpm,maxBpm,bandLimits,sr,
                                                 combFilterPulses)
    print(f"Detecting song's tempo ...")
    print(f'Second attempt...')
    songTempo = detect_tempo(diffrected, 1, songTempo - 5, songTempo + 5, bandLimits,
                                                    sr, combFilterPulses)

    print(f"Detecting song's metre...")
#     return ipd.Audio(diffrected, rate=sr)
    
    metre = detect_metre(diffrected, songTempo, bandLimits, sr,
                                                combFilterPulses)
    return songTempo, metre

def __center_sample_to_beat(signal, required_length):
        n = len(signal)
        index = 0

        max = np.max(abs(signal))

        for i in range(0, n):
            if abs(signal[i]) > max * 0.9:
                index = i
                break

        lastindex = required_length
        lastindex += index
        if lastindex > n:
            lastindex = n
        if lastindex - index < required_length:
            index = index - (required_length - (lastindex - index))

        return signal[index:int(lastindex)]

def __prepare_filterbanks(signal, bandlimits, samplingFrequency):
        dft = np.fft.fft(signal)
        n = len(dft)
        nbands = len(bandlimits)
        bl = np.zeros(nbands, int)
        br = np.zeros(nbands, int)

        for band in range(0, nbands - 1):
            bl[band] = np.floor(bandlimits[band] / samplingFrequency * n / 2) + 1
            br[band] = np.floor(bandlimits[band + 1] / samplingFrequency * n / 2)

        bl[0] = 0
        bl[nbands - 1] = np.floor(bandlimits[nbands - 1] / samplingFrequency * n / 2) + 1
        br[nbands - 1] = np.floor(n / 2)

        output = np.zeros([nbands, n], dtype=complex)

        for band in range(0, nbands):
            for hz in range(bl[band], br[band]):
                output[band, hz] = dft[hz]
            for hz in range(n - br[band], n - bl[band]):
                output[band, hz] = dft[hz]

        output[1, 1] = 0
        return output

def __hann(signal, winLength, bandslimits, samplingFrequency):
        n = len(signal[0])
        nbands = len(bandslimits)
        hannlen = winLength * 2 * samplingFrequency
        hann = np.zeros(n)
        wave = np.zeros([nbands, n], dtype=complex)
        output = np.zeros([nbands, n], dtype=complex)
        freq = np.zeros([nbands, n], dtype=complex)
        filtered = np.zeros([nbands, n], dtype=complex)

        for a in range(1, int(hannlen)):
            hann[a] = (np.cos(a * np.pi / hannlen / 2)) ** 2

        for band in range(0, nbands):
            wave[band] = np.real(np.fft.ifft(signal[band]))

        for band in range(0, nbands):
            for j in range(0, n):
                if wave[band, j] < 0:
                    wave[band, j] = -wave[band, j]
            freq[band] = np.fft.fft(wave[band])

        for band in range(0, nbands):
            filtered[band] = freq[band] * np.fft.fft(hann)
            output[band] = np.real(np.fft.ifft(filtered[band]))

        return output

def __diffrect(signal, nbands=6):
        n = len(signal[0])
        output = np.zeros([nbands, n])

        for band in range(0, nbands):
            for j in range(5, n):
                d = signal[band, j] - signal[band, j - 1]
                if d > 0:
                    output[band, j] = d

        return output

In [12]:
song = "Data/genres_original/blues/blues.00055.wav"
signal, sr = librosa.load(song, duration=20)
ipd.Audio(song, rate=sr)
Tempo, meter = detect_tempo_metre(song)
print ('meter is', meter)
print ('Tempo is', Tempo)

  Y[sl] = X[sl]
  Y[sl] = X[sl]
  Y[sl] += X[sl]  # add the component of X at N/2


Preparing filterbank for song...
Hanning song...
Differentiating song ...




Detecting song's tempo ...
First attempt...
0.00 %
2.78 %
5.56 %
8.33 %
11.11 %
13.89 %
16.67 %
19.44 %
22.22 %
25.00 %
27.78 %
30.56 %
33.33 %
36.11 %
38.89 %
41.67 %
44.44 %
47.22 %
50.00 %
52.78 %
55.56 %
58.33 %
61.11 %
63.89 %
66.67 %
69.44 %
72.22 %
75.00 %
77.78 %
80.56 %
83.33 %
86.11 %
88.89 %
91.67 %
94.44 %
97.22 %
Detecting song's tempo ...
Second attempt...
0.00 %
10.00 %
20.00 %
30.00 %
40.00 %
50.00 %
60.00 %
70.00 %
80.00 %
90.00 %
Detecting song's metre...
25.00 %
50.00 %
75.00 %
100.00 %
meter is 4\4
Tempo is 133
