In [1]:
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction
import matplotlib.pyplot as plt
import subprocess as subp
import os

In [81]:
def getWavFiles():
    files = subp.check_output(["ls"]).decode("utf8").split("\n")
    files = [f for f in files if f.split(".")[-1] == "wav"]
    return files
    
# Gets feature vector for a wav song
def getFeatureVector(file):
    [Fs, x] = audioBasicIO.readAudioFile(file);
    x = audioBasicIO.stereo2mono(x)
    
    # We cut the audio to a 30 second window in the middle of the song
    # If the audio is shorter than 30 second, we discard it
    nSamples = Fs * 30
    if len(x) < nSamples:
        return None
    offset   = (len(x) - nSamples) // 2
    x = x[offset:offset+nSamples]

    print("Sampling rate, number of samples: ", Fs, len(x))

    mterm, sterm, f_names = audioFeatureExtraction.mtFeatureExtraction(x, Fs, 2*Fs, 2*Fs, 0.050*Fs, 0.025*Fs);

    # Should return 68 mid-term features per mid-term window (30 windows)
    return mterm.ravel()

dataset = []

os.chdir("music_negative/")
files = getWavFiles()
for f in files:
    features = getFeatureVector(f)
    if features is not None:
        dataset.append([features, 0])
os.chdir("..")

os.chdir("music_positive/")
files = getWavFiles()
for f in files:
    features = getFeatureVector(f)
    if features is not None:
        dataset.append([features, 1])
os.chdir("..")

dataset

Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, number of samples:  44100 1323000
Sampling rate, numbe

[[array([0.06389519, 0.0607475 , 0.06171166, ..., 0.00577995, 0.01059661,
         0.00715891]), 0],
 [array([0.06880104, 0.05780399, 0.06014632, ..., 0.00850338, 0.00919012,
         0.00869681]), 0],
 [array([0.09451565, 0.0999206 , 0.10162772, ..., 0.01221416, 0.01193331,
         0.01224679]), 0],
 [array([0.05362409, 0.07862409, 0.06920372, ..., 0.00931003, 0.01131456,
         0.00801118]), 0],
 [array([0.06791629, 0.05756012, 0.04602427, ..., 0.00993236, 0.00691155,
         0.00758121]), 0],
 [array([0.08203267, 0.09156647, 0.0623412 , ..., 0.01430264, 0.01131676,
         0.02015864]), 0],
 [array([0.0965177 , 0.08666062, 0.0799569 , ..., 0.01112409, 0.01063557,
         0.00715959]), 0],
 [array([0.05188294, 0.03346756, 0.02599819, ..., 0.01063129, 0.00771326,
         0.01609343]), 0],
 [array([0.0489451 , 0.05418557, 0.07911751, ..., 0.01003528, 0.01710502,
         0.01185217]), 0],
 [array([0.0626191 , 0.06856284, 0.07002609, ..., 0.01422197, 0.01463131,
         0.013092

In [82]:
import numpy as np
from sklearn.neural_network import MLPClassifier

In [83]:
np.random.shuffle(dataset)
partition = int(0.7*len(dataset))
train = dataset[:partition]
test  = dataset[partition:]

In [94]:
clf = MLPClassifier(solver='sgd', alpha=1e-4, hidden_layer_sizes=(10), random_state=1, max_iter=1000)

X = [ i[0] for i in train ]
Y = [ i[1] for i in train ]
clf.fit(X, Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=10, learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [95]:
for sample in test:
    x = sample[0]
    y = sample[1]
    y_star = clf.predict([x])
    print(x, [y, y_star])

[0.02838022 0.02648593 0.02258394 ... 0.02195509 0.01541998 0.02480708] [1, array([1])]
[0.03279832 0.03615585 0.05912545 ... 0.00773785 0.01490249 0.00803764] [0, array([1])]
[0.09670485 0.09284256 0.0769113  ... 0.01154063 0.01275512 0.00759375] [0, array([0])]
[0.02447822 0.02595848 0.03152223 ... 0.00581035 0.01087677 0.00920921] [1, array([1])]
[0.04083485 0.02231171 0.03265653 ... 0.01227336 0.01174804 0.00916385] [1, array([1])]
[0.02471075 0.02981511 0.10334052 ... 0.01098615 0.01092745 0.01028931] [1, array([0])]
[0.01505785 0.01455309 0.01656647 ... 0.02402717 0.00938667 0.00600967] [1, array([1])]
[0.0965177  0.08666062 0.0799569  ... 0.01112409 0.01063557 0.00715959] [0, array([0])]
[0.09451565 0.0999206  0.10162772 ... 0.01221416 0.01193331 0.01224679] [0, array([0])]
[0.03347323 0.02856171 0.03296847 ... 0.00750076 0.01080506 0.01048807] [1, array([1])]
