In [1]:
import os
import numpy as np
import sklearn
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from os.path import isfile, isdir, join

# Librosa for audio
import librosa
import librosa.display


# DISPLAY
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline
import IPython.display

import pdb

In [2]:
dataset_path = "/home/lblier/Music/ESC-50/"
orig_sampling = 44100

In [3]:
data_files = {}
for cat in os.listdir(dataset_path) :
    if isdir(join(dataset_path, cat)):
        data_files[cat] = os.listdir(join(dataset_path, cat))
n = sum(len(r) for r in data_files.values())

In [4]:
cat, files = next(iter(data_files.items()))
data = {}
for cat, files in data_files.items():
    print("Loading category : "+cat)
    data[cat] = [librosa.load(join(dataset_path,cat, f), sr=orig_sampling) for f in files]
    #data[cat] = [(y, sr) for (y,sr) in data[cat] if sr == orig_sampling]

Loading category : 403 - Keyboard typing
Loading category : 503 - Siren
Loading category : 306 - Footsteps
Loading category : 508 - Airplane
Loading category : 501 - Helicopter
Loading category : 404 - Door - wood creaks
Loading category : 107 - Hen
Loading category : 504 - Car horn
Loading category : 305 - Coughing
Loading category : 303 - Clapping
Loading category : 408 - Clock alarm
Loading category : 207 - Wind
Loading category : 505 - Engine
Loading category : 302 - Sneezing
Loading category : 109 - Sheep
Loading category : 309 - Snoring
Loading category : 410 - Glass breaking
Loading category : 507 - Church bells
Loading category : 208 - Pouring water
Loading category : 110 - Crow
Loading category : 201 - Rain
Loading category : 307 - Laughing
Loading category : 304 - Breathing
Loading category : 202 - Sea waves
Loading category : 301 - Crying baby
Loading category : 105 - Frog
Loading category : 103 - Pig
Loading category : 204 - Crickets
Loading category : 402 - Mouse click
Loa

In [16]:
data_aligned = {}
#sampling = 11000
sampling = 44100
length_audio = 5
m = sampling*length_audio

def align_signal(y,m):
    if len(y) > m:
        return y[:m]
    if len(y) < m:
        z = np.zeros(m, dtype=np.float32)
        q = m//len(y)
        for w in range(q):
            z[w*len(y):(w+1)*len(y)] = y
        z[q*len(y):] = y[:m-len(y)]
        
        return z
    
for cat, l in data.items():
    data_aligned[cat] = [ \
        (align_signal(librosa.core.resample(y, sr, sampling),m), sampling) \
        for (y,sr) in l \
                        ]

In [25]:
representations = {}
feat = (lambda y, sr: librosa.core.cqt(y,sr,real=True))
#feat = (lambda y, sr: librosa.feature.mfcc(y,sr=sr))
p = 0
for cat, l in data_aligned.items():
    print("Category : "+cat)
    representations[cat] = [feat(y,sr=sr) for (y,sr) in l \
                            if type(y) == np.ndarray]
    if p == 0:
        p = representations[cat][0].flatten().shape[0]

Category : 403 - Keyboard typing


  return array(a, dtype, copy=False, order=order)


Category : 503 - Siren
Category : 306 - Footsteps
Category : 508 - Airplane
Category : 501 - Helicopter
Category : 404 - Door - wood creaks
Category : 107 - Hen
Category : 504 - Car horn
Category : 305 - Coughing
Category : 303 - Clapping
Category : 309 - Snoring
Category : 505 - Engine
Category : 302 - Sneezing
Category : 109 - Sheep
Category : 410 - Glass breaking
Category : 507 - Church bells
Category : 208 - Pouring water
Category : 110 - Crow
Category : 201 - Rain
Category : 307 - Laughing
Category : 207 - Wind
Category : 304 - Breathing
Category : 202 - Sea waves
Category : 301 - Crying baby
Category : 105 - Frog
Category : 103 - Pig
Category : 408 - Clock alarm
Category : 402 - Mouse click
Category : 509 - Fireworks
Category : 506 - Train
Category : 210 - Thunderstorm
Category : 406 - Washing machine
Category : 308 - Brushing teeth
Category : 401 - Door knock
Category : 409 - Clock tick
Category : 502 - Chainsaw
Category : 101 - Dog
Category : 203 - Crackling fire
Category : 102



Category : 205 - Chirping birds
Category : 206 - Water drops
Category : 310 - Drinking - sipping
Category : 407 - Vacuum cleaner
Category : 209 - Toilet flush
Category : 510 - Hand saw


In [26]:
i = 0
u = 0
y = np.zeros(n)
X = np.zeros((n, p))
for cat, rep in representations.items():
    y[u:u+len(rep)] = i
    a = np.stack([x.flatten() for x in rep], axis=0)
    X[u:u+len(rep),:] = a
    i += 1
    u += len(rep)

In [27]:
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [28]:
classifier = SVC(C=1.,kernel='linear')
#classifier = RandomForestClassifier()

In [29]:
classifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [30]:
score_train = classifier.score(X_train, y_train)
score_test = classifier.score(X_test, y_test)

In [31]:
score_train, score_test

(1.0, 0.22)