In [16]:
#!/usr/bin/python3

from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction
import matplotlib.pyplot as plt
import subprocess as subp
import os, time, sys
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib

# Returns the .wav files in current directory.
# Return value is a list of filenames.
def getWavFiles():
    files = subp.check_output(["ls"]).decode("utf8").split("\n")
    files = [f for f in files if f.split(".")[-1] == "wav"]
    return files

# Gets feature vector for a wav song
def getFeatureVector(file):
    try:
        [Fs, x] = audioBasicIO.readAudioFile(file);
        x = audioBasicIO.stereo2mono(x)
    except:
        print("Error on file: ", file)
        return None

    # We cut the audio to a 30 second window in the middle of the song
    # If the audio is shorter than 30 second, we discard it
    nSamples = int(Fs * 29.9)
    if isinstance(x, int):
        return None
    elif len(x) < nSamples:
        return None
    offset   = (len(x) - nSamples) // 2
    x = x[offset:offset+nSamples]

    mterm, sterm, f_names = audioFeatureExtraction.mtFeatureExtraction(x, Fs, 1*Fs, 1*Fs, 0.050*Fs, 0.025*Fs);

    # Should return 68 mid-term features per mid-term window (30 windows)
    return mterm.ravel()

# Returns the genres for each negative example.
# Return value is a dictionary of music_id -> genre
def getGenres():
    with open("genres.csv") as fp:
        genres = fp.read()

    gdict = {}
    genres = [ g.split(",") for g in genres.strip().split("\n") ]
    for row in genres:
        gdict[int(row[0])] = row[1]

    return gdict

# Returns the dataset composed of feature vectors and the class of each music
# Return value is a list containing:
#     1) a matrix where each row is [feature vector, class, music id, genre]
#     2) dictionary with counters of how many items per genre
def getDataset():
    dataset = []
    progress = 0
    genreCounter = {}

    # Get genre dictionary and initialize genre counters
    genres = getGenres()
    for g in set(genres.values()):
        genreCounter[g] = 0
    genreCounter["Classical"] = 0

    os.chdir("music_negative/")
    files = getWavFiles()
    for f in files:
        idd = int(f.rstrip(".wav"))   # Get music id from filename
        genre = genres[idd]           # Get music genre from dictionary
        if genreCounter[genre] > 400: # Don't get features for more than 400 songs per genre
            continue

        if progress % 10 == 0:
            print("Processing: {}".format(progress))

        features = getFeatureVector(f)
        if features is not None:
            dataset.append([features, 0, idd, genre])
            genreCounter[genre] += 1    # Increment genrecounter

        progress += 1

    os.chdir("..")

    os.chdir("music_positive/")
    files = getWavFiles()
    for f in files:
        if progress % 10 == 0:
            print("Processing: {}".format(progress))

        features = getFeatureVector(f)
        if features is not None:
            dataset.append([features, 1, -1, "Classical"])
            genreCounter["Classical"] += 1

        progress += 1

    os.chdir("..")

    return [np.array(dataset), genreCounter]

# Trains an MLP using the given dataset.
def trainModel(trainData, solve="lbfgs", hidden=(4)):
    clf = MLPClassifier(solver=solve, alpha=1e-4, hidden_layer_sizes=hidden, max_iter=1000, activation="logistic")

    X = [ i[0] for i in trainData ]
    Y = [ i[1] for i in trainData ]
    clf.fit(X, Y)

    return clf

def experimentMany(dataset):
    # Create a randomized copy of the dataset
    rDataset = dataset.copy()
    np.random.shuffle(rDataset)

    # Create statistics
    genres = set([ row[3] for row in dataset ])

    # k-fold cross-validation
    nFolds = 5
    foldSize = len(rDataset)//nFolds

    for solver in ["lbfgs", "adam", "sgd"]:
        print("{}\t{}\t{}\t{}\t{}".format("solver", "layrs", "acc", "train", "test"))

        for layers in [(1), (2), (3), (4), (5), (6), (7), (8)]:
            success = 0
            attempts = 0

            execTries = 10
            testTime  = 0
            trainTime = 0

            # Create statistics
            stats  = {}
            for g in genres:
                stats[g] = [0,0] # 0 Successes, 0 Failures

            for execId in range(execTries):
                for i in range(nFolds):
                    train_idx = np.array([ True for i in range(len(rDataset)) ])
                    train_idx[i*foldSize:(i+1)*foldSize] = False

                    if i == nFolds-1:
                        train_idx[-1] = False

                    test_idx = train_idx == False

                    train = rDataset[train_idx]
                    test  = rDataset[test_idx]

                    # train model
                    beg = time.time()
                    clf = trainModel(train, solver, layers)
                    trainTime += time.time() - beg

                    beg = time.time()

                    # Calculate accuracy
                    for [x,y,idd,genre] in test:
                        y_star = clf.predict([x])
                        # print("[{},{}]".format(y,y_star))
                        if y_star == y:
                            success += 1
                            stats[genre][0] += 1
                        else:
                            stats[genre][1] += 1
                        attempts += 1

                    testTime += time.time() - beg

            print("{}\t{}\t{}\t{}\t{}\t{}".format(solver, layers, success / attempts, trainTime / execTries, testTime / execTries, stats))

def experimentOne(dataset):
    # Create a randomized copy of the dataset
    rDataset = dataset.copy()
    np.random.shuffle(rDataset)

    # Create statistics
    genres = set([ row[3] for row in dataset ])

    # k-fold cross-validation
    nFolds = 5
    foldSize = len(rDataset)//nFolds

    print("{}\t{}\t{}".format("acc", "train", "test"))

    success = 0
    attempts = 0

    execTries = 10
    testTime  = 0
    trainTime = 0

    # Create statistics
    stats  = {}
    for g in genres:
        stats[g] = [0,0] # 0 Successes, 0 Failures

    for execId in range(execTries):
        for i in range(nFolds):
            train_idx = np.array([ True for i in range(len(rDataset)) ])
            train_idx[i*foldSize:(i+1)*foldSize] = False

            if i == nFolds-1:
                train_idx[-1] = False

            test_idx = train_idx == False

            train = rDataset[train_idx]
            test  = rDataset[test_idx]

            # train model
            beg = time.time()
            clf = trainModel(train, "lbfgs", (4))
            trainTime += time.time() - beg

            beg = time.time()

            # Calculate accuracy
            for [x,y,idd,genre] in test:
                y_star = clf.predict([x])
                # print("[{},{}]".format(y,y_star))
                if y_star == y:
                    success += 1
                    stats[genre][0] += 1
                else:
                    stats[genre][1] += 1
                attempts += 1

            testTime += time.time() - beg

        print("{}\t{}\t{}\t{}".format(success / attempts, trainTime / execTries, testTime / execTries, stats))


# Normalizes the dataset so that each feature has mean 0 and deviation 1
def zNormalize(dataset):
    # Each row in the dataset has the feature vector, then its class
    feats = dataset[:,0]

    # Transform to matrix
    matrix = np.matrix([[col for col in row] for row in feats])

    # For each feature (column), we z-normalize it
    for col in range(matrix.shape[1]):
        mean = np.mean(matrix[:,col])
        std  = np.std(matrix[:,col])
        matrix[:,col] = (matrix[:,col] - mean) / std

    # Replace features in the dataset
    for row in range(matrix.shape[0]):
        dataset[row,0] = np.array(matrix[row,:]).flatten()

# Balances the dataset by replicating classical songs
def fillDataset(dataset):
    nOther = len(dataset[dataset[:,3] != "Classical"])
    nClass = len(dataset[dataset[:,3] == "Classical"])
    remaining = nOther - nClass
    
    if remaining < 0:
        return dataset

    nCopies = (remaining + nClass - 1) // nClass
    classical = dataset[dataset[:,3] == "Classical"]
    classical = np.concatenate([classical for i in range(nCopies)])
    
    return np.concatenate([dataset, classical])

dataset = np.load("features.npy")
dataset = fillDataset(dataset)
experimentOne(dataset)

acc	train	test
0.9703001088477686	3.929287385940552	0.0664531946182251	{'Folk': [359, 41], 'Hip-Hop': [397, 3], 'Instrumental': [362, 38], 'Experimental': [375, 25], 'Rock': [392, 8], 'International': [376, 24], 'Electronic': [387, 13], 'Pop': [375, 25], 'Classical': [3217, 14]}
0.9693671279738766	6.551239037513733	0.13079073429107665	{'Folk': [721, 79], 'Hip-Hop': [796, 4], 'Instrumental': [713, 87], 'Experimental': [752, 48], 'Rock': [788, 12], 'International': [759, 41], 'Electronic': [774, 26], 'Pop': [753, 47], 'Classical': [6412, 50]}
0.9701446120354533	9.461775207519532	0.1918696403503418	{'Folk': [1089, 111], 'Hip-Hop': [1193, 7], 'Instrumental': [1067, 133], 'Experimental': [1129, 71], 'Rock': [1184, 16], 'International': [1133, 67], 'Electronic': [1162, 38], 'Pop': [1133, 67], 'Classical': [9627, 66]}
0.9708443476908724	12.290276694297791	0.25242974758148196	{'Folk': [1457, 143], 'Hip-Hop': [1590, 10], 'Instrumental': [1427, 173], 'Experimental': [1507, 93], 'Rock': [1579, 21

In [30]:
data = {'Folk': [3656, 344], 'Hip-Hop': [3982, 18], 'Instrumental': [3551, 449], 'Experimental': [3758, 242], 'Rock': [3946, 54], 'International': [3781, 219], 'Electronic': [3883, 117], 'Pop': [3786, 214], 'Classical': [32114, 196]}

In [31]:
for k in sorted(data.keys()):
    g = data[k]
    total = sum(g)
    fail = g[1]
    succ = g[0]
    print(k, fail/total * 10000 // 1 / 100, succ/total * 10000 // 1 / 100, sep="\t")

Classical	0.6	99.39
Electronic	2.92	97.07
Experimental	6.05	93.95
Folk	8.59	91.4
Hip-Hop	0.45	99.55
Instrumental	11.22	88.77
International	5.47	94.52
Pop	5.35	94.65
Rock	1.35	98.65


In [32]:
classical = data["Classical"]
del data["Classical"]
other = data

In [38]:
sum([ i[0] for i in other.values() ]), sum([ i[1] for i in other.values() ])

(30343, 1657)

In [42]:
classical[0], classical[1]

(32114, 196)

In [43]:
30343 / (30343 + 1657), 1657 / (30343 + 1657)

(0.94821875, 0.05178125)

In [44]:
c = classical
c[0] / sum(c), c[1] / sum(c)

(0.9939337666357165, 0.006066233364283503)