In [20]:
import numpy, scipy, matplotlib.pyplot as plt, sklearn, librosa, urllib, IPython.display
import numpy as np
import librosa.display
import signal

from scipy.signal import butter, lfilter, freqz

import glob
from pathlib import Path, PurePath
import essentia
import essentia.standard as es
import essentia.streaming as ess
from essentia.standard import *

import time

filelist = []

for filename in Path('music').glob('**/*.mp3'):
    filelist.append(filename)
    
for filename in Path('music').glob('**/*.wav'):
    filelist.append(filename)
    
fs = 44100
path = 'music/THRONES1.mp3'

def compute_bpm_hist(path):
    # Loading audio file
    audio = MonoLoader(filename=path, sampleRate=fs)()
    # Compute beat positions and BPM
    rhythm_extractor = RhythmExtractor2013(method="multifeature")
    bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(audio)
    peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = BpmHistogramDescriptors()(beats_intervals)
    return bpm, histogram

def compute_mfcc(path):
    # Loading audio file
    audio = MonoLoader(filename=path, sampleRate=fs)()
    #analysis sample rate (audio will be converted to it before analysis, recommended and default value is 44100.0)
    # Compute all features, aggregate only 'mean' and 'stdev' statistics for all low-level, rhythm and tonal frame features
    features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path)
    # See all feature names in the pool in a sorted order
    #get only upper triangular matrix values to shorten length
    m, n = features['lowlevel.mfcc.cov'].shape
    #print m
    iu1 = np.triu_indices(m)
    cov = features['lowlevel.mfcc.cov'][iu1]
    #print(features['lowlevel.mfcc.cov'])
    return features['lowlevel.mfcc.mean'], cov

def compute_chroma_notes(path):
    # Loading audio file
    audio = MonoLoader(filename=path, sampleRate=fs)()
    # Initialize algorithms we will use
    frameSize = 4096#512
    hopSize = 2048#256
    #will resample if sampleRate is different!
    loader = ess.MonoLoader(filename=path, sampleRate=44100)
    framecutter = ess.FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='noise')
    windowing = ess.Windowing(type='blackmanharris62')
    spectrum = ess.Spectrum()
    spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                      magnitudeThreshold=0.00001,
                                      minFrequency=20,
                                      maxFrequency=3500,
                                      maxPeaks=60)
    # Use default HPCP parameters for plots, however we will need higher resolution
    # and custom parameters for better Key estimation
    hpcp = ess.HPCP()
    hpcp_key = ess.HPCP(size=36, # we will need higher resolution for Key estimation
                        referenceFrequency=440, # assume tuning frequency is 44100.
                        bandPreset=False,
                        minFrequency=20,
                        maxFrequency=3500,
                        weightType='cosine',
                        nonLinear=False,
                        windowSize=1.)
    key = ess.Key(profileType='edma', # Use profile for electronic music
                  numHarmonics=4,
                  pcpSize=36,
                  slope=0.6,
                  usePolyphony=True,
                  useThreeChords=True)
    # Use pool to store data
    pool = essentia.Pool()
    # Connect streaming algorithms
    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    spectralpeaks.magnitudes >> hpcp_key.magnitudes
    spectralpeaks.frequencies >> hpcp_key.frequencies
    hpcp_key.hpcp >> key.pcp
    hpcp.hpcp >> (pool, 'tonal.hpcp')
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')
    # Run streaming network
    essentia.run(loader)
    #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale'])
    #print(pool['tonal.hpcp'].T)
    chroma = pool['tonal.hpcp'].T
    #print(chroma.shape)
    m, n = chroma.shape
    avg = 0
    chroma = chroma.transpose()
    m, n = chroma.shape
    for j in chroma:
        avg = avg + np.sum(j)
    avg = avg / m
    threshold = avg
    for i in chroma:
        if np.sum(i) > threshold:
            ind = np.where(i == np.max(i))
            max_val = i[ind]#is always 1!
            i[ind] = 0
            
            ind2 = np.where(i == np.max(i))
            #ind2 = ind2[0]
            #print ind2
            i[ind] = 1
            
            if np.all(i[ind2] >= 0.8 * max_val):
                #i[ind2] = i[ind2]
                pass
            #low_values_flags = i < 1
            low_values_flags = i < 0.8
            
            i[low_values_flags] = 0
        else:
            i.fill(0)     
    chroma = chroma.transpose()
    # Compute beat positions and BPM
    rhythm_extractor = RhythmExtractor2013(method="multifeature")
    bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(audio)
    tempo = bpm
    times = beats
    beats_frames = (beats * fs) / hopSize
    beats_frames = beats_frames.astype(int)
    prev_beat = 0
    act_beat = 0
    sum_key = np.zeros(12)
    chroma = chroma.transpose()  
    for i in beats_frames:
        act_beat = i
        sum_key = sum(chroma[prev_beat:act_beat])
        ind = np.where(sum_key == np.max(sum_key))
        fill = np.zeros(len(j))
        fill[ind] = 1
        chroma[prev_beat:act_beat] = fill
        prev_beat = i
    notes = []
    for i in notes:
        del i
    for i in beats_frames:
        act_beat = i
        sum_key = sum(chroma[prev_beat:act_beat])
        ind = np.where(sum_key == np.max(sum_key))
        prev_beat = i
        notes.append(ind[0][0])
    chroma = chroma.transpose()  
    return pool['tonal.key_key'], pool['tonal.key_scale'], notes

def compute_chroma_aligned(path):
    # Loading audio file
    audio = MonoLoader(filename=path, sampleRate=fs)()
    # Initialize algorithms we will use
    frameSize = 4096#512
    hopSize = 2048#256
    #will resample if sampleRate is different!
    loader = ess.MonoLoader(filename=path, sampleRate=44100)
    framecutter = ess.FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='noise')
    windowing = ess.Windowing(type='blackmanharris62')
    spectrum = ess.Spectrum()
    spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                      magnitudeThreshold=0.00001,
                                      minFrequency=20,
                                      maxFrequency=3500,
                                      maxPeaks=60)
    # Use default HPCP parameters for plots, however we will need higher resolution
    # and custom parameters for better Key estimation
    hpcp = ess.HPCP()
    hpcp_key = ess.HPCP(size=36, # we will need higher resolution for Key estimation
                        referenceFrequency=440, # assume tuning frequency is 44100.
                        bandPreset=False,
                        minFrequency=20,
                        maxFrequency=3500,
                        weightType='cosine',
                        nonLinear=False,
                        windowSize=1.)
    key = ess.Key(profileType='edma', # Use profile for electronic music
                  numHarmonics=4,
                  pcpSize=36,
                  slope=0.6,
                  usePolyphony=True,
                  useThreeChords=True)
    # Use pool to store data
    pool = essentia.Pool()
    # Connect streaming algorithms
    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    spectralpeaks.magnitudes >> hpcp_key.magnitudes
    spectralpeaks.frequencies >> hpcp_key.frequencies
    hpcp_key.hpcp >> key.pcp
    hpcp.hpcp >> (pool, 'tonal.hpcp')
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')
    # Run streaming network
    essentia.run(loader)
    #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale'])
    chroma = pool['tonal.hpcp'].T
    threshold = 300
    m, n = chroma.shape
    # Compute beat positions and BPM
    rhythm_extractor = RhythmExtractor2013(method="multifeature")
    bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(audio)
    tempo = bpm
    times = beats
    beats_frames = (beats * fs) / hopSize
    beats_frames = beats_frames.astype(int)
    prev_beat = 0
    act_beat = 0
    sum_key = np.zeros(12)
    chroma = chroma.transpose()  
    for i in beats_frames:
        act_beat = i
        chroma[prev_beat:act_beat] = sum(chroma[prev_beat:act_beat])/(act_beat-prev_beat)
        prev_beat = i
    chroma = chroma.transpose()  
    return pool['tonal.key_key'], pool['tonal.key_scale'], chroma

In [21]:
path = 'music/THRONES1.mp3'

key, scale, notes = compute_chroma_notes(path)
#print key
#print scale
#print notes

[7]
[7]
[7]
[6]
[7]
[9]
[11]
[10]
[7]
[7]
[7]
[7]
[11]
[3]
[3]
[3]
[4]
[6]
[4]
[8]
[7]
[7]
[7]
[7]
[2]
[8]
[9]
[3]
[3]
[6]
[11]
[0]
[0]
[10]
[5]
[5]
[8]
[3]
[2]
[6]
[8]
[3]
[3]
[10]
[10]
[10]
[3]
[10]
[3]
[3]
[3]
[10]
[3]
[8]
[5]
[10]
[0]
[3]
[10]
[3]
[2]
[3]
[3]
[2]
[4]
[10]
[10]
[4]
[0]
[4]
[11]
[0]
[10]
[7]
[1]
[11]
[3]
[2]
[0]
[9]
[10]
[3]
[11]
[2]
[11]
[3]
[0]
[3]
[10]
[10]
[4]
[0]
[3]
[10]
[5]
[3]
[3]
[10]
[11]
[8]
[3]
[3]
[9]
[5]
[7]
[10]
[7]
[7]
[7]
[10]
[10]
[7]
[1]
[3]
[7]
[10]
[10]
[4]
[8]
[7]
[2]
[3]
[1]
[6]
[3]
[1]
[1]
[4]
[9]
[6]
[4]
[1]
[1]
[11]
[9]
[2]
[10]
[9]
[9]
[9]
[9]
[9]
[9]
[9]
[5]
[10]
[8]
[10]
[0]
[9]
[8]
[10]
[3]
[3]
[5]
[7]
[5]
[0]
[10]
[0]
[5]
[10]
[3]
[8]
[8]
[9]
[3]
[1]
[1]
[10]
[3]
[4]
[0]
[5]
[8]
[5]
[4]
[5]
[5]
[0]
[8]
[1]
[3]
[5]
[9]
[3]
[3]
[3]
[3]
[1]
[1]
[3]
[3]
[3]
[3]
[1]
[3]
[11]
[3]
[1]
[1]
[1]
[1]
[8]
[1]
[1]
[0]
[1]
[1]
[5]
[5]
[5]
[9]
[1]
[6]
[11]
[5]
[9]
[8]
[3]
[3]
[3]
[1]
[3]
[1]
[8]
[3]
[1]
[4]
[10]
[5]
[4]
[6]
[6]
[3]
[9]
[0]
[7]
[10]
[8

In [9]:
key, scale, notes = compute_chroma_aligned(path)
#print key
#print scale
#print chroma.shape

In [4]:
bpmret, hist = compute_bpm_hist(path)
#print bpmret
#print hist

In [5]:
mean, cov = compute_mfcc(path)
print mean
print cov

[-782.459      178.5395     -11.086075    27.762318     3.233001
    6.910735     1.4131231    2.1776037   -5.257601    -5.980739
   -2.4921422   -8.008745    -3.0513396]
[ 2.57239277e+04  7.68981396e+03 -3.53856689e+03  6.36435364e+02
 -1.46437048e+03 -2.12631714e+02 -5.54968323e+02  1.22613277e+01
 -4.27736053e+02 -1.10699844e+02  1.92868317e+02 -4.24857605e+02
 -4.00181396e+02  4.47885840e+03 -1.03266626e+03  2.16211182e+02
 -2.12796799e+02 -2.13258514e+02 -1.88441772e+02 -1.44886505e+02
 -1.17767799e+02 -1.48931976e+02 -1.11836157e+01 -1.63817566e+02
 -9.68742142e+01  9.99523010e+02 -8.33072281e+01  2.51003128e+02
  8.87906113e+01  1.46565231e+02  3.51734314e+01 -9.13383389e+00
 -1.98074780e+01  3.16439247e+01  4.81033096e+01  3.14301300e+01
  1.92504089e+02  1.60997448e+01  4.56674538e+01 -4.77413654e+00
  9.68953323e+00  1.51671543e+01  2.63316393e+00 -2.97681332e+01
 -1.14782362e+01  5.89304638e+00  2.10479324e+02  3.68501854e+01
  5.35296936e+01 -5.10496902e+00  3.61312637e+01 

In [48]:
# Store start time
start_time = time.time()

with open("features/out.mfcc", "w") as myfile:
    myfile.write("")
    myfile.close()
        
with open("features/out.mfcc", "a") as myfile:
    count = 1
    for file_name in filelist:
        path = str(PurePath(file_name))
        print ("MFCC - File " + path + " " + str(count) + " von " + str(len(filelist))) 
        mean, cov = compute_mfcc(path)
        mean = np.array2string(mean, precision=8, separator=',', suppress_small=True).replace('\n', '')#.strip('[ ]')
        cov = np.array2string(cov, precision=8, separator=',', suppress_small=True).replace('\n', '')#.strip('[ ]')
        line = (str(PurePath(file_name)) + "; " + mean + "; " + cov).replace('\n', '')
        myfile.write(line + '\n')       
        count = count + 1
    myfile.close()

with open("features/out.chroma", "w") as myfile:
    myfile.write("")
    myfile.close()

with open("features/out.chroma", "a") as myfile:
    count = 1
    for file_name in filelist: 
        path = str(PurePath(file_name))
        print ("Chroma - File " + path + " " + str(count) + " von " + str(len(filelist))) 
        key, scale, notes = compute_chroma_notes(path)
        tempo = str(tempo)
        notes = str(notes).replace('\n', '')
        line = (str(PurePath(file_name)) + "; " + key + "; " + scale + "; " + notes).replace('\n', '')
        myfile.write(line + '\n')       
        count = count + 1
    myfile.close()

# Perform any action like print a string
print("Printing this string takes ...")

# Store end time
end_time = time.time()

# Calculate the execution time and print the result
print("%.10f seconds" % (end_time - start_time))


MFCC - File music/SPACE1.mp3 1 von 28
MFCC - File music/PUNISH2.mp3 2 von 28
MFCC - File music/eguitar.mp3 3 von 28
MFCC - File music/TURCA3.mp3 4 von 28
MFCC - File music/AFRICA1.mp3 5 von 28
MFCC - File music/guitar.mp3 6 von 28
MFCC - File music/guitar2.mp3 7 von 28
MFCC - File music/HURRICANE2.mp3 8 von 28
MFCC - File music/guitar3.mp3 9 von 28
MFCC - File music/BACK1.mp3 10 von 28
MFCC - File music/AFRICA2.mp3 11 von 28
MFCC - File music/TNT1.mp3 12 von 28
MFCC - File music/WALLS2.mp3 13 von 28
MFCC - File music/WALLS1.mp3 14 von 28
MFCC - File music/THRONES1.mp3 15 von 28
MFCC - File music/TNT2.mp3 16 von 28
MFCC - File music/BACK2.mp3 17 von 28
MFCC - File music/PUNISH1.mp3 18 von 28


KeyboardInterrupt: 