In [33]:
# global configuration variables
dbFilename = r"user-library.db"

# source folder for archive files
siblingSourceFolder = "User Library"

In [34]:
import essentia
from essentia import Pool, array
import essentia.streaming as ess
import essentia.standard as stan
import numpy as np
# from pylab import *
# %matplotlib inline
# import matplotlib.pyplot as plt

from IPython.display import Audio

## SQLite interface

In [35]:
import sqlite3
from sqlite3 import Error


def create_connection(db_file):
    """ create a database connection to a SQLite database """
    conn = None
#     try:
    conn = sqlite3.connect(db_file)
    print(sqlite3.version)
    return conn
#     except Error as e:
#         print(e)
        
def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
def create_archive_file(conn, archive_file):
    """
    Create a new archive_file into the archive_file table
    :param conn:
    :param archive_file:
    :return: project id
    """
    sql = ''' INSERT INTO archive_files(name, durationSeconds)
              VALUES(?,?) '''
    cur = conn.cursor()
    cur.execute(sql, archive_file)
    conn.commit()
    return cur.lastrowid

def create_sound_object(conn, sound_object):
    """
    Create a new sound_object
    :param conn:
    :param sound_object:
    :return: sound_object id
    """

    sql = ''' INSERT INTO sound_objects(durationSeconds,effectiveDuration,rms,loudness,pitch,pitchConfidence,flatness,centroid,variance,path,archive_file_id,archive_file_duration_seconds,start,end)
              VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?) '''
    cur = conn.cursor()
    cur.execute(sql, sound_object)
    conn.commit()

    return cur.lastrowid

def create_descriptor_stats(conn, stats):
    """
    Create a new task
    :param conn:
    :param stats:
    :return: stats id
    """

    sql = ''' INSERT INTO stats(descriptor, avg, stddev, min, max)
              VALUES(?,?,?,?,?) '''
    cur = conn.cursor()
    cur.execute(sql, stats)
    conn.commit()

    return cur.lastrowid

def update_stats(conn, stats):
    sql = ''' UPDATE stats
              SET descriptor = ?
              WHERE id = ?'''
    cur = conn.cursor()
    cur.execute(sql, stats)
    conn.commit()

def delete_stats(conn):
    """
    Delete stats records
    :param conn:
    """

    sql = ''' DELETE FROM stats '''
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit()

## SQLite user-defined functions

In [36]:
import sqlite3
import math

def create_stats(descriptorName,avg,stddev,descriptorMin,descriptorMax):
    statsRecord = (
        descriptorName,
        avg,
        stddev,
        descriptorMin,
        descriptorMax
    )
    return create_descriptor_stats(db, statsRecord)

# https://www.alexforencich.com/wiki/en/scripts/python/stdev
class StdevFunc:
    def __init__(self):
        self.M = 0.0
        self.S = 0.0
        self.k = 1
 
    def step(self, value):
        if value is None:
            return
        tM = self.M
        self.M += (value - tM) / self.k
        self.S += (value - tM) * (value - self.M)
        self.k += 1
 
    def finalize(self):
        if self.k < 3:
            return None
        return math.sqrt(self.S / (self.k-2))
    
class HistogramFunc:
    def __init__(self):
        self.d = np.empty(1)

    def step(self, value):
        self.d = np.append(self.d, value)
 
    def finalize(self):
        self.d = np.delete(self.d, 0)
        
        stddev = np.std(self.d)
        mean = np.mean(self.d)
        descMin = np.amin(self.d)
        descMax = np.amax(self.d)

        print("mean, std")
        print(mean)
        print(stddev)
        
        print("min")
        print(descMin)
        print(np.argmin(self.d))

        
        print("max")
        print(descMax)
        print(np.argmax(self.d))

        range_low = mean - 2*stddev;
        range_high = mean + 2*stddev;
        if range_low < 0:
            range_low = 0
        range_tuple = (range_low,range_high)
        print(range_tuple)

#         plt.hist(self.d, bins = 40, range = range_tuple) 
#         plt.title("Histogram") 
#         plt.show()
        
        stats_id = create_stats("name", mean, stddev, descMin, descMax)
        
        return stats_id


## Create database and tables

In [37]:
db = create_connection(dbFilename)

sql_create_stats_table = """ CREATE TABLE stats (
                                    id integer PRIMARY KEY,
                                    descriptor text NOT NULL,
                                    avg float NOT NULL,
                                    stddev float NOT NULL,
                                    min float NOT NULL,
                                    max float NOT NULL
                                ); """

sql_create_archive_files_table = """ CREATE TABLE archive_files (
                                    id integer PRIMARY KEY,
                                    name text NOT NULL,
                                    durationSeconds float NOT NULL
                                ); """

sql_create_sound_objects_table = """CREATE TABLE sound_objects (
                                    id integer PRIMARY KEY,
                                    durationSeconds float NOT NULL,
                                    effectiveDuration float NOT NULL,
                                    rms float NOT NULL,
                                    loudness float NOT NULL,
                                    pitch float NOT NULL,
                                    pitchConfidence float NOT NULL,
                                    flatness float NOT NULL,
                                    centroid float NOT NULL,
                                    variance float NOT NULL,
                                    path text NOT NULL,
                                    archive_file_id integer NOT NULL,
                                    archive_file_duration_seconds float NOT NULL,
                                    start integer NOT NULL,
                                    end integer NOT NULL,
                                    FOREIGN KEY (archive_file_id) REFERENCES archive_files (id)
                                );"""

# create tables
if db is not None:
    # create stats table
    create_table(db, sql_create_stats_table)
    
    # create archive_files table
    create_table(db, sql_create_archive_files_table)

    # create sound_objects table
    create_table(db, sql_create_sound_objects_table)
else:
    print("Error! cannot create the database connection.")

2.6.0
table stats already exists
table archive_files already exists
table sound_objects already exists


In [38]:
def get_sound_objects_from_onsets(onset_positions, audio):

    sound_objects = []
    skipped = 0
    numOnsets = onset_positions.shape[0]
    if numOnsets == 1:
        return sound_objects;
    for i in range(numOnsets):
        if i == 0:
            continue

        nStart = int(sampleRate * onset_positions[i-1]) # previous onset sample
        nEnd = int(sampleRate * onset_positions[i]) # current onset sample

        if(nEnd-nStart) < sampleRate*0.15: # remove sounds that are shorter than 0.15 seconds
            continue
        if(nEnd-nStart) > sampleRate*10: # filter sounds that are longer than 10
            continue

        startFrame = int(nStart/hopSize) # loss of precision here
        endFrame = int(nEnd/hopSize)

        sound_object_data = {}
        sound_object_data["rms"] = stan.Mean()(pool["features.rms"][startFrame:endFrame])
        
        sound_object_data["loudness"] = stan.Mean()(pool["features.loudness"][startFrame:endFrame])

        #print("mean rms: " + str(stan.Mean()(pool["features.rms"][startFrame:endFrame])))
        #print("weighted rms: " + str(sound_object_data["rms"]))
             
        sound_object_data["flatness"] = energy_weighted_mean(pool["features.flatnessDB"], startFrame, endFrame)
        
        weighted_mean_centroid = energy_weighted_mean(pool["features.centroid"], startFrame, endFrame)
        centroid_frequency = weighted_mean_centroid * (sampleRate/2)
        centroid_mel = 1000 * math.log2(1 + centroid_frequency/1000)
        sound_object_data["centroid"] = centroid_mel
        
        sound_object_data["variance"] = energy_weighted_mean(pool["features.variance"], startFrame, endFrame)

        pitch_freq = pitch_confidence_weighted_mean(pool["features.pitch"], startFrame, endFrame)
        if pitch_freq < 1:
            pitch_freq = 0
        sound_object_data["pitch"] = 1000 * math.log2(1 + pitch_freq/1000)
        #print("mean pitch: " + str(stan.Mean()(pool["features.pitch"][startFrame:endFrame])))
        #print("weighted pitch: " + str(sound_object_data["pitch"]))
        
        sound_object_data["pitchConfidence"] = energy_weighted_mean(pool["features.pitchConfidence"], startFrame, endFrame)
        
        sound_object_data["audio"] = audio[nStart:nEnd]
        sound_object_data["start"] = nStart
        sound_object_data["end"] = nEnd
        
        if(nStart > len(audio)):
            skipped = skipped + 1
            continue

        sound_objects.append(sound_object_data)

    return sound_objects

def energy_weighted_mean(feature_vector, startFrame, endFrame):
    weights = pool["features.energy"][startFrame : endFrame]
    weighted_feature = feature_vector[startFrame : endFrame] * weights
    weighted_feature_sum = np.sum(weighted_feature)
    
    return float(weighted_feature_sum / np.sum(weights))

def pitch_confidence_weighted_mean(feature_vector, startFrame, endFrame):
    weights = pool["features.pitchConfidence"][startFrame : endFrame]
    weighted_feature = feature_vector[startFrame : endFrame] * weights
    weighted_feature_sum = np.sum(weighted_feature)
    
    return float(weighted_feature_sum / np.sum(weights))
    

In [39]:
# putting it all together
from pathlib import Path
import json

def write_archive_file_sound_objects_to_db(archive_file_path, sound_objects, archive_file_duration_in_seconds):
    # initialize an object to store data for the clip
    sourcePath = Path(archive_file_path) # create the source path
    path_relative_to_db_file = str("/".join(sourcePath.parts[2::]))
    
    archive_file = (path_relative_to_db_file, archive_file_duration_in_seconds,)
    archive_file_id = create_archive_file(db, archive_file)

    # process each detected sound_object
    for i, sound_object in enumerate(sound_objects):
        durationSeconds = len(sound_object["audio"]) / sampleRate
        
        effectiveDuration = stan.EffectiveDuration()(sound_object["audio"])
        
        sound_object_attributes = (
            durationSeconds,
            effectiveDuration,
            sound_object["rms"],
            sound_object["loudness"],
            sound_object["pitch"],
            sound_object["pitchConfidence"],
            sound_object["flatness"],
            sound_object["centroid"],
            sound_object["variance"],
            path_relative_to_db_file,
            archive_file_id,
            archive_file_duration_in_seconds,
            sound_object["start"],
            sound_object["end"]
        )

        create_sound_object(db, sound_object_attributes)

In [40]:
from pathlib import Path

def already_processed(archive_file_path):
    
    sourcePath = Path(archive_file_path) # create the source path
    path_relative_to_db_file = str("/".join(sourcePath.parts[2::]))
    
    cursor = db.cursor()
    rowsQuery = "SELECT Count(*) FROM archive_files where name = \"" + path_relative_to_db_file + "\";"
    cursor.execute(rowsQuery)
    return cursor.fetchone()[0] > 0

In [41]:
from datetime import datetime
import os
import traceback

print("Loading extractor...")

count = 0;

standardLoader = stan.MonoLoader()
streamingLoader = ess.MonoLoader()

sampleRate=44100
frameSize=2048
hopSize=512

framecutter = ess.FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='noise')
windowing = ess.Windowing(type='hann')
c2p = ess.CartesianToPolar()
od1 = ess.OnsetDetection(method='hfc')
fft = ess.FFT()
pitch = ess.PitchYinFFT(frameSize=frameSize)
rms = ess.RMS()
loudness = ess.Loudness()
flatness = ess.FlatnessDB()
centroid = ess.Centroid()
variance = ess.Variance()
spectrum = ess.Spectrum()
energy = ess.Energy()
pool = Pool()

# Connect streaming algorithms
streamingLoader.audio >> framecutter.signal
framecutter.frame >> windowing.frame >> fft.frame
fft.fft >> c2p.complex
c2p.phase >> od1.phase
c2p.magnitude >> None

windowing.frame >> energy.array
energy.energy >> (pool, 'features.energy')

windowing.frame >> spectrum.frame
spectrum.spectrum >> od1.spectrum
od1.onsetDetection >> (pool, 'features.hfc')

windowing.frame >> rms.array
rms.rms >> (pool, 'features.rms')

windowing.frame >> loudness.signal
loudness.loudness >> (pool, 'features.loudness')

spectrum.spectrum >> pitch.spectrum
pitch.pitch >> (pool, 'features.pitch')
pitch.pitchConfidence >> (pool, 'features.pitchConfidence')

c2p.magnitude >> flatness.array
flatness.flatnessDB >> (pool, 'features.flatnessDB')

c2p.magnitude >> centroid.array
centroid.centroid >> (pool, 'features.centroid')

c2p.magnitude >> variance.array
variance.variance >> (pool, 'features.variance')

recursive_list = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.getcwd() + "/" + siblingSourceFolder) for f in fn]
print("... recursive walk found %i files ..." % len(recursive_list))

for archive_file_path in recursive_list:
    if (archive_file_path.endswith((".wav",".m4a",".aiff",".mp3",".aif",))):
        if already_processed(archive_file_path):
            print(str(count) + ": " + archive_file_path + " -- already processed, skipping")
            count = count + 1
            continue
            
        try:
            standardLoader.configure(filename=archive_file_path)
            audio = standardLoader()
            streamingLoader.configure(filename=archive_file_path)
        except:
            formatted_exception = traceback.format_exc()
            print("some error occured on %s" % archive_file_path)
            print(formatted_exception)
            continue
        
        count = count + 1
        essentia.run(streamingLoader)

        # Phase 2: compute the actual onsets locations
        
        # NOTE This implementation depends on a frameRate of 44100.0/512.0
        onsets = stan.Onsets(frameRate=sampleRate/hopSize)

        onsets_hfc = onsets(# this algo expects a matrix, not a vector
            array([ pool['features.hfc'] ]),
            # you need to specify weights, but as there is only a single
            # function, it doesn't actually matter which weight you give it
            [ 1 ])
        
        sound_objects = get_sound_objects_from_onsets(onsets_hfc, audio)
        
        archive_file_duration_in_seconds = len(audio)/sampleRate
        write_archive_file_sound_objects_to_db(archive_file_path, sound_objects, archive_file_duration_in_seconds)
        
        print(str(count) + ": " + archive_file_path + " ✅ " + str(len(sound_objects)) + " sound objects")

        
        # clear pool for next run
        pool.clear()
        # flush the audio buffer variable
        del audio



Loading extractor...
... recursive walk found 934 files ...
0: /notebooks/User Library/Samples/Imported/06 That's Us _ Wild Combination.m4a -- already processed, skipping
1: /notebooks/User Library/Samples/Imported/06 Thats Us _ Wild Combination copy.m4a -- already processed, skipping
2: /notebooks/User Library/Samples/Imported/20160508 164508.m4a -- already processed, skipping
3: /notebooks/User Library/Samples/Imported/CR78Hat_O.wav -- already processed, skipping
4: /notebooks/User Library/Samples/Imported/Cabasa.wav -- already processed, skipping
5: /notebooks/User Library/Samples/Imported/Clap-1oct.wav -- already processed, skipping
6: /notebooks/User Library/Samples/Imported/Claps.wav -- already processed, skipping
7: /notebooks/User Library/Samples/Imported/Closedhat.wav -- already processed, skipping
8: /notebooks/User Library/Samples/Imported/Cowbell.wav -- already processed, skipping
9: /notebooks/User Library/Samples/Imported/Crash-1.wav -- already processed, skipping
10: /no

98: /notebooks/User Library/Samples/Imported/Industry Snare (28).wav -- already processed, skipping
99: /notebooks/User Library/Samples/Imported/Industry Snare (29).wav -- already processed, skipping
100: /notebooks/User Library/Samples/Imported/Industry Snare (30).wav -- already processed, skipping
101: /notebooks/User Library/Samples/Imported/Industry Snare (31).wav -- already processed, skipping
102: /notebooks/User Library/Samples/Imported/Industry Snare (32).wav -- already processed, skipping
103: /notebooks/User Library/Samples/Imported/Industry Snare (33).wav -- already processed, skipping
104: /notebooks/User Library/Samples/Imported/Industry Snare (34).wav -- already processed, skipping
105: /notebooks/User Library/Samples/Imported/Industry Snare (35).wav -- already processed, skipping
106: /notebooks/User Library/Samples/Imported/Industry Snare (36).wav -- already processed, skipping
107: /notebooks/User Library/Samples/Imported/Industry Snare (37).wav -- already processed, s

237: /notebooks/User Library/Samples/Imported/R50 Shaker.wav -- already processed, skipping
238: /notebooks/User Library/Samples/Imported/Raymond Scott - 06 - -Don't Beat Your Wife Every Night!-.mp3 -- already processed, skipping
239: /notebooks/User Library/Samples/Imported/Raymond Scott - 06 - -Dont Beat Your Wife Every Night.mp3 -- already processed, skipping
240: /notebooks/User Library/Samples/Imported/Ride.wav -- already processed, skipping
241: /notebooks/User Library/Samples/Imported/Rock South Hey.wav -- already processed, skipping
242: /notebooks/User Library/Samples/Imported/SH_Bass-A0-127-8DP5.aif -- already processed, skipping
243: /notebooks/User Library/Samples/Imported/SH_Bass-A1-127-JBZO.aif -- already processed, skipping
244: /notebooks/User Library/Samples/Imported/SH_Bass-A2-127-FQD7.aif -- already processed, skipping
245: /notebooks/User Library/Samples/Imported/SH_Bass-A3-127-ZGEV.aif -- already processed, skipping
246: /notebooks/User Library/Samples/Imported/SH_

375: /notebooks/User Library/Samples/Imported/mfb_snare.wav -- already processed, skipping
376: /notebooks/User Library/Samples/Imported/perc 2.aiff -- already processed, skipping
377: /notebooks/User Library/Samples/Imported/poprockin clap.wav -- already processed, skipping
378: /notebooks/User Library/Samples/Imported/rhythm77_low_tom.wav -- already processed, skipping
379: /notebooks/User Library/Samples/Imported/sd6.wav -- already processed, skipping
380: /notebooks/User Library/Samples/Imported/sd7_echo.wav -- already processed, skipping
381: /notebooks/User Library/Samples/Imported/shaker 2.aiff -- already processed, skipping
382: /notebooks/User Library/Samples/Imported/snare 1.aiff -- already processed, skipping
383: /notebooks/User Library/Samples/Imported/snare 2.aiff -- already processed, skipping
384: /notebooks/User Library/Samples/Imported/snare 3.aiff -- already processed, skipping
385: /notebooks/User Library/Samples/Imported/snare 4.aiff -- already processed, skipping


In [42]:
db = create_connection(dbFilename)
db.create_aggregate("stdev", 1, StdevFunc)
db.create_aggregate("histogram", 1, HistogramFunc)

 
def stats(column_name, conn):
    print("📊 %s 📈" % column_name)
    cur = db.cursor()
    cur.execute("select histogram(%s) from sound_objects" %column_name)
    stats_id = cur.fetchone()[0]
    stats_record = (column_name, stats_id)
    update_stats(conn, stats_record)
    
    
delete_stats(db)

stats("durationSeconds", db)
stats("effectiveDuration", db)
stats("rms", db)
stats("loudness", db)
stats("pitch", db)
stats("flatness", db)
stats("centroid", db)
stats("variance", db)


2.6.0
📊 durationSeconds 📈
mean, std
0.389380219535
0.72732753154
min
0.150907029478
32
max
8.52174603175
31
(0, 1.8440352826156987)
📊 effectiveDuration 📈
mean, std
0.0666093772567
0.353309004593
min
0.000317460304359
3
max
5.86977338791
228
(0, 0.77322738644204547)
📊 rms 📈
mean, std
0.000166190209361
9.37223400264e-05
min
2.72185417316e-07
328
max
0.000762613199186
228
(0, 0.0003536348894136396)
📊 loudness 📈
mean, std
0.00157461528609
0.00112543424864
min
2.66159332796e-07
328
max
0.0109936734661
228
(0, 0.0038254837833646997)
📊 pitch 📈
mean, std
404.543358909
335.449538645
min
62.8940433016
223
max
3264.46180671
1082
(0, 1075.4424361999736)
📊 flatness 📈
mean, std
0.284050070219
0.109289461999
min
0.024256747216
328
max
0.964687943459
481
(0.065471146220231646, 0.50262899421808971)
📊 centroid 📈
mean, std
1480.30749792
457.933194359
min
70.4795040449
223
max
3747.50367163
1085
(564.44110920473076, 2396.1738866399705)
📊 variance 📈
mean, std
0.000112844822353
0.000121949326794
min
1.08256