In [49]:
#!/usr/bin/python3

from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction
import matplotlib.pyplot as plt
import subprocess as subp
import os
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib
import time

def getWavFiles():
    files = subp.check_output(["ls"]).decode("utf8").split("\n")
    files = [f for f in files if f.split(".")[-1] == "wav"]
    return files

# Gets feature vector for a wav song
counter = 0
def getFeatureVector(file):
    global counter

    try:
        [Fs, x] = audioBasicIO.readAudioFile(file);
        x = audioBasicIO.stereo2mono(x)
    except:
        print("Error on file: ", file)
        return None

    # We cut the audio to a 30 second window in the middle of the song
    # If the audio is shorter than 30 second, we discard it
    nSamples = Fs * 30
    if isinstance(x, int):
        return None
    elif len(x) < nSamples:
        return None
    offset   = (len(x) - nSamples) // 2
    x = x[offset:offset+nSamples]

    print("{}\tSampling rate, number of samples: {} {}".format(counter, Fs, len(x)))
    counter += 1

    mterm, sterm, f_names = audioFeatureExtraction.mtFeatureExtraction(x, Fs, 1*Fs, 1*Fs, 0.050*Fs, 0.025*Fs);

    # Should return 68 mid-term features per mid-term window (30 windows)
    return mterm.ravel()

def getDataset():
    dataset = []

    os.chdir("music_negative/")
    files = getWavFiles()
    for f in files[:5]:
        features = getFeatureVector(f)
        if features is not None:
            dataset.append([features, 0])
    os.chdir("..")

    os.chdir("music_positive/")
    files = getWavFiles()
    for f in files[:5]:
        features = getFeatureVector(f)
        if features is not None:
            dataset.append([features, 1])
    os.chdir("..")

    return np.array(dataset)

def trainModel(trainData, solve="lbfgs", hidden=(10)):
    clf = MLPClassifier(solver=solve, alpha=1e-4, hidden_layer_sizes=hidden, random_state=1, max_iter=1000)

    X = [ i[0] for i in trainData ]
    Y = [ i[1] for i in trainData ]
    clf.fit(X, Y)

    return clf

def experiment(dataset):
    # k-fold cross-validation
    nFolds = 5
    foldSize = len(dataset)//nFolds

    for solver in ["lbfgs", "adam", "sgd"]:
        print("{}\t{}\t{}\t{}".format("layrs", "acc", "train", "test"))

        for layers in [(1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (20), (30), (40), (50), (60), (5,2), (5,4), (10,2), (10,4), (20,2), (20,4)]:
            success = 0
            attempts = 0

            execTries = 10
            testTime  = 0
            trainTime = 0

            for execId in range(execTries):
                for i in range(nFolds):
                    train_idx = np.array([ True for i in range(len(dataset)) ])
                    train_idx[i*foldSize:(i+1)*foldSize] = False

                    if i == nFolds-1:
                        train_idx[-1] = False

                    test_idx = train_idx == False

                    train = dataset[train_idx]
                    test  = dataset[test_idx]

                    # train model
                    beg = time.time()
                    clf = trainModel(train, solver, layers)
                    trainTime += time.time() - beg

                    beg = time.time()

                    # Calculate accuracy
                    for [x,y] in test:
                        y_star = clf.predict([x])
                        # print("[{},{}]".format(y,y_star))
                        if y_star == y:
                            success += 1
                        attempts += 1

                    testTime += time.time() - beg

            print("{}\t{}\t{}\t{}\t{}".format(solver, layers, success / attempts, trainTime / execTries, testTime / execTries))



dataset = getDataset()
dataset

0	Sampling rate, number of samples: 44100 1323000
1	Sampling rate, number of samples: 44100 1323000
2	Sampling rate, number of samples: 44100 1323000
3	Sampling rate, number of samples: 44100 1323000
4	Sampling rate, number of samples: 44100 1323000
5	Sampling rate, number of samples: 44100 1323000
6	Sampling rate, number of samples: 44100 1323000
7	Sampling rate, number of samples: 44100 1323000
8	Sampling rate, number of samples: 44100 1323000


array([[array([0.02691697, 0.03661525, 0.03583258, ..., 0.00615601, 0.00683532,
       0.01327782]),
        0],
       [array([0.12133621, 0.0534029 , 0.03995009, ..., 0.00781565, 0.007242  ,
       0.00744991]),
        0],
       [array([0.03315563, 0.03447142, 0.04029038, ..., 0.02939013, 0.02163053,
       0.03312321]),
        0],
       [array([0.05775862, 0.05960753, 0.04840064, ..., 0.02286728, 0.01760071,
       0.02309974]),
        0],
       [array([0.01366833, 0.03232759, 0.04320554, ..., 0.00695013, 0.0052236 ,
       0.00620496]),
        1],
       [array([0.03919011, 0.03822595, 0.04891107, ..., 0.00574882, 0.00415907,
       0.00450465]),
        1],
       [array([0.02959392, 0.02527223, 0.01741152, ..., 0.00757818, 0.00739796,
       0.00622492]),
        1],
       [array([0.04885436, 0.05417423, 0.05723684, ..., 0.00614226, 0.01079344,
       0.01535494]),
        1],
       [array([0.066402  , 0.05336887, 0.05780399, ..., 0.0059058 , 0.00555958,
       0.0073705

In [50]:
def zNormalize(dataset):
    # Each row in the dataset has the feature vector, then its class
    feats = dataset[:,0]
    
    # Transform to matrix
    matrix = np.matrix([[col for col in row] for row in feats])
    
    # For each feature (column), we z-normalize it
    for col in range(matrix.shape[1]):
        mean = np.mean(matrix[:,col])
        std  = np.std(matrix[:,col])
        matrix[:,col] = (matrix[:,col] - mean) / std
    
    # Replace features in the dataset
    for row in range(matrix.shape[0]):
        dataset[row,0] = np.array(matrix[row,:]).flatten()

zNormalize(dataset)

[[array([0.02691697, 0.03661525, 0.03583258, ..., 0.00615601, 0.00683532,
       0.01327782])
  0]
 [array([0.12133621, 0.0534029 , 0.03995009, ..., 0.00781565, 0.007242  ,
       0.00744991])
  0]
 [array([0.03315563, 0.03447142, 0.04029038, ..., 0.02939013, 0.02163053,
       0.03312321])
  0]
 [array([0.05775862, 0.05960753, 0.04840064, ..., 0.02286728, 0.01760071,
       0.02309974])
  0]
 [array([0.01366833, 0.03232759, 0.04320554, ..., 0.00695013, 0.0052236 ,
       0.00620496])
  1]
 [array([0.03919011, 0.03822595, 0.04891107, ..., 0.00574882, 0.00415907,
       0.00450465])
  1]
 [array([0.02959392, 0.02527223, 0.01741152, ..., 0.00757818, 0.00739796,
       0.00622492])
  1]
 [array([0.04885436, 0.05417423, 0.05723684, ..., 0.00614226, 0.01079344,
       0.01535494])
  1]
 [array([0.066402  , 0.05336887, 0.05780399, ..., 0.0059058 , 0.00555958,
       0.00737059])
  1]]
[[-0.72216324 -0.5619213  -0.63816444 ... -0.57865681 -0.48511438
   0.03542578]
 [ 2.43097705  0.90367331 -

In [63]:
data = [ i[3] for i in dataset[:,0] ]
np.mean(data), np.std(data)

(-5.427757009278544e-16, 0.9999999999999999)