In [1]:
import numpy as np
import csv
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize, StandardScaler
import tensorflow as tf
from tensorflow.python.client import device_lib
from tensorflow import keras
from keras import backend as K
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers.core import Activation, Flatten, Dense
from keras.layers import BatchNormalization, Dropout

print(device_lib.list_local_devices())

Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3176995065613540268
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3164969369
locality {
  bus_id: 1
  links {
  }
}
incarnation: 13540953408768866840
physical_device_desc: "device: 0, name: GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [2]:
categories = ('BIG_BAND', 'BLUES_CONTEMPORARY', 'COUNTRY_TRADITIONAL', 'DANCE', 'ELECTRONICA', 'EXPERIMENTAL',
              'FOLK_INTERNATIONAL', 'GOSPEL', 'GRUNGE_EMO', 'HIP_HOP_RAP', 'JAZZ_CLASSIC', 'METAL_ALTERNATIVE',
              'METAL_DEATH', 'METAL_HEAVY', 'POP_CONTEMPORARY', 'POP_INDIE', 'POP_LATIN', 'PUNK', 'REGGAE',
              'RNB_SOUL', 'ROCK_ALTERNATIVE', 'ROCK_COLLEGE', 'ROCK_CONTEMPORARY', 'ROCK_HARD', 'ROCK_NEO_PSYCHEDELIA')

In [3]:
def derivatives_dataset():
    print("[INFO] loading derivatives dataset ...")
    derivative_labels = pd.read_csv('tagged_feature_sets/msd-jmirderivatives_dev/msd-jmirderivatives_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(derivative_labels) :
        derivative_labels[i] = categories.index(label)
    derivative_features = pd.read_csv('tagged_feature_sets/msd-jmirderivatives_dev/msd-jmirderivatives_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(derivative_labels)
    np.asarray(derivative_features)
    derivative_labels = to_categorical(derivative_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_deriv, X_deriv_test, Y_deriv, Y_deriv_test = train_test_split(derivative_features, derivative_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=derivative_labels)
    print("[INFO] Labels format : {0}".format(derivative_labels.shape))
    print("[INFO] Features format : {0}".format(derivative_features.shape))
    return X_deriv, X_deriv_test, Y_deriv, Y_deriv_test

In [4]:
def lpc_dataset():
    print("[INFO] loading lpc dataset ...")
    lpc_labels = pd.read_csv('tagged_feature_sets/msd-jmirlpc_dev/msd-jmirlpc_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(lpc_labels) :
        lpc_labels[i] = categories.index(label)
    lpc_features = pd.read_csv('tagged_feature_sets/msd-jmirlpc_dev/msd-jmirlpc_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(lpc_labels)
    np.asarray(lpc_features)
    lpc_labels = to_categorical(lpc_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_lpc, X_lpc_test, Y_lpc, Y_lpc_test = train_test_split(lpc_features, lpc_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=lpc_labels)
    print("[INFO] Labels format : {0}".format(lpc_labels.shape))
    print("[INFO] Features format : {0}".format(lpc_features.shape))
    return X_lpc, X_lpc_test, Y_lpc, Y_lpc_test

In [5]:
def mfccs_dataset():
    print("[INFO] loading mfccs dataset ...")
    mfccs_labels = pd.read_csv('tagged_feature_sets/msd-jmirmfccs_dev/msd-jmirmfccs_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(mfccs_labels) :
        mfccs_labels[i] = categories.index(label)
    mfccs_features = pd.read_csv('tagged_feature_sets/msd-jmirmfccs_dev/msd-jmirmfccs_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(mfccs_labels)
    np.asarray(mfccs_features)
    mfccs_labels = to_categorical(mfccs_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_mfccs, X_mfccs_test, Y_mfccs, Y_mfccs_test = train_test_split(mfccs_features, mfccs_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=mfccs_labels)
    print("[INFO] Labels format : {0}".format(mfccs_labels.shape))
    print("[INFO] Features format : {0}".format(mfccs_features.shape))
    return X_mfccs, X_mfccs_test, Y_mfccs, Y_mfccs_test

In [6]:
def moments_dataset():
    print("[INFO] loading moments dataset ...")
    moments_labels = pd.read_csv('tagged_feature_sets/msd-jmirmoments_dev/msd-jmirmoments_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(moments_labels) :
        moments_labels[i] = categories.index(label)
    moments_features = pd.read_csv('tagged_feature_sets/msd-jmirmoments_dev/msd-jmirmoments_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(moments_labels)
    np.asarray(moments_features)
    moments_labels = to_categorical(moments_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_moments, X_moments_test, Y_moments, Y_moments_test = train_test_split(moments_features, moments_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=moments_labels)
    print("[INFO] Labels format : {0}".format(moments_labels.shape))
    print("[INFO] Features format : {0}".format(moments_features.shape))
    return X_moments, X_moments_test, Y_moments, Y_moments_test

In [7]:
def spectral_dataset():
    print("[INFO] loading spectral dataset ...")
    spectral_labels = pd.read_csv('tagged_feature_sets/msd-jmirspectral_dev/msd-jmirspectral_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(spectral_labels) :
        spectral_labels[i] = categories.index(label)
    spectral_features = pd.read_csv('tagged_feature_sets/msd-jmirspectral_dev/msd-jmirspectral_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(spectral_labels)
    np.asarray(spectral_features)
    spectral_labels = to_categorical(spectral_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_spectral, X_spectral_test, Y_spectral, Y_spectral_test = train_test_split(spectral_features, spectral_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=spectral_labels)
    print("[INFO] Labels format : {0}".format(spectral_labels.shape))
    print("[INFO] Features format : {0}".format(spectral_features.shape))
    return X_spectral, X_spectral_test, Y_spectral, Y_spectral_test

In [8]:
def marsyas_dataset():
    print("[INFO] loading marsyas dataset ...")
    marsyas_labels = pd.read_csv('tagged_feature_sets/msd-marsyas_dev_new/msd-marsyas_dev_new.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(marsyas_labels) :
        marsyas_labels[i] = categories.index(label)
    marsyas_features = pd.read_csv('tagged_feature_sets/msd-marsyas_dev_new/msd-marsyas_dev_new.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(marsyas_labels)
    np.asarray(marsyas_features)
    marsyas_labels = to_categorical(marsyas_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_marsyas, X_marsyas_test, Y_marsyas, Y_marsyas_test = train_test_split(marsyas_features, marsyas_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=marsyas_labels)
    print("[INFO] Labels format : {0}".format(marsyas_labels.shape))
    print("[INFO] Features format : {0}".format(marsyas_features.shape))
    return X_marsyas, X_marsyas_test, Y_marsyas, Y_marsyas_test

In [9]:
def mvd_dataset():
    print("[INFO] loading mvd dataset ...")
    mvd_labels = pd.read_csv('tagged_feature_sets/msd-mvd_dev/msd-mvd_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(mvd_labels) :
        mvd_labels[i] = categories.index(label)
    mvd_features = pd.read_csv('tagged_feature_sets/msd-mvd_dev/msd-mvd_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(mvd_labels)
    np.asarray(mvd_features)
    mvd_labels = to_categorical(mvd_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_mvd, X_mvd_test, Y_mvd, Y_mvd_test = train_test_split(mvd_features, mvd_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=mvd_labels)
    print("[INFO] Labels format : {0}".format(mvd_labels.shape))
    print("[INFO] Features format : {0}".format(mvd_features.shape))
    return X_mvd, X_mvd_test, Y_mvd, Y_mvd_test

In [10]:
def rh_dataset():
    print("[INFO] loading rh dataset ...")
    rh_labels = pd.read_csv('tagged_feature_sets/msd-rh_dev_new/msd-rh_dev_new.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(rh_labels) :
        rh_labels[i] = categories.index(label)
    rh_features = pd.read_csv('tagged_feature_sets/msd-rh_dev_new/msd-rh_dev_new.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(rh_labels)
    np.asarray(rh_features)
    rh_labels = to_categorical(rh_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_rh, X_rh_test, Y_rh, Y_rh_test = train_test_split(rh_features, rh_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=rh_labels)
    print("[INFO] Labels format : {0}".format(rh_labels.shape))
    print("[INFO] Features format : {0}".format(rh_features.shape))
    return X_rh, X_rh_test, Y_rh, Y_rh_test

In [11]:
def ssd_dataset():
    print("[INFO] loading ssd dataset ...")
    ssd_labels = pd.read_csv('tagged_feature_sets/msd-ssd_dev/msd-ssd_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(ssd_labels) :
        ssd_labels[i] = categories.index(label)
    ssd_features = pd.read_csv('tagged_feature_sets/msd-ssd_dev/msd-ssd_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(ssd_labels)
    np.asarray(ssd_features)
    ssd_labels = to_categorical(ssd_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_ssd, X_ssd_test, Y_ssd, Y_ssd_test = train_test_split(ssd_features, ssd_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=ssd_labels)
    print("[INFO] Labels format : {0}".format(ssd_labels.shape))
    print("[INFO] Features format : {0}".format(ssd_features.shape))
    return X_ssd, X_ssd_test, Y_ssd, Y_ssd_test

In [12]:
def trh_dataset():
    print("[INFO] loading trh dataset ...")
    trh_labels = pd.read_csv('tagged_feature_sets/msd-trh_dev/msd-trh_dev.csv',
                                    delimiter=',', header=None).values[:, -1:]
    for i, label in enumerate(trh_labels) :
        trh_labels[i] = categories.index(label)
    trh_features = pd.read_csv('tagged_feature_sets/msd-trh_dev/msd-trh_dev.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    np.asarray(trh_labels)
    np.asarray(trh_features)
    trh_labels = to_categorical(trh_labels, num_classes=25)
    print("[INFO] splitting the data ...")
    X_trh, X_trh_test, Y_trh, Y_trh_test = train_test_split(trh_features, trh_labels,
                                                                    test_size=0.20, random_state=42,
                                                                    stratify=trh_labels)
    print("[INFO] Labels format : {0}".format(trh_labels.shape))
    print("[INFO] Features format : {0}".format(trh_features.shape))
    return X_trh, X_trh_test, Y_trh, Y_trh_test

In [13]:
def deepMLP(num_features, num_classes):
    n_hidden_1 = 100 # 1st layer number of neurons
    n_hidden_2 = 75 # 2nd layer number of neurons
    n_hidden_3 = 50 # 3rd layer number of neurons
    n_hidden_4 = 30 # 3rd layer number of neurons

    model = Sequential()
    model.add(Dense(n_hidden_1, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(n_hidden_2, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(n_hidden_3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(Dense(n_hidden_4, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(num_classes, activation='softmax'))
    return model

def wideMLP(num_features, num_classes):
    n_hidden_1 = 200 # 1st layer number of neurons
    n_hidden_2 = 75  # 2nd layer number of neurons

    model = Sequential()
    model.add(Dense(n_hidden_1, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(n_hidden_2, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(num_classes, activation='softmax'))
    return model

In [14]:
num_classes = len(categories)
num_features = 0
BATCH_SIZE = 500
INIT_LR = 5e-4
EPOCHS = 50
Datasets = ['derivatives', 'lpc', 'mfccs', 'moments', 'spectral', 'marsyas', 'mvd', 'rh', 'ssd', 'trh']
deep_val_acc = []
wide_val_acc = []

In [15]:
def scaleDataset(X, X_test, num_features):
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    if X_test!=None :
        X_test = scaler.fit_transform(X_test)
    num_features = X.shape[1]
    return X, X_test, num_features

In [16]:
def createDeepModel(num_classes, num_features, X, X_test, Y, Y_test, EPOCHS, BATCH_SIZE, INIT_LR, chosenDataset):
    plots = False
    deepModel = deepMLP(num_features, num_classes)
    deepModel.compile(optimizer=Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print("[INFO] training deep model on {0} dataset".format(chosenDataset))
    deepH = deepModel.fit(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                          validation_data=(X_test, Y_test), verbose = 0)
    deepModel.save("ModelsMLP\\"+"deepMPL"+chosenDataset+".hdf5")
    
    if plots :
        plt.style.use("ggplot")
        plt.figure()
        plt.plot(np.arange(0, EPOCHS), deepH.history["loss"], label="train_loss")
        plt.plot(np.arange(0, EPOCHS), deepH.history["val_loss"], label="val_loss")
        plt.plot(np.arange(0, EPOCHS), deepH.history["acc"], label="train_acc")
        plt.plot(np.arange(0, EPOCHS), deepH.history["val_acc"], label="val_acc")
        plt.title("Training Loss and Accuracy on "+chosenDataset)
        plt.xlabel("Epoch #")
        plt.ylabel("Loss/Accuracy")
        plt.legend(loc="lower left")
        plt.savefig("GraphsMLP\\"+"deepMPL"+chosenDataset+".png")
    return deepH.history["val_acc"][-1]

In [17]:
def createWideModel(num_classes, num_features, X, X_test, Y, Y_test, EPOCHS, BATCH_SIZE, INIT_LR, chosenDataset):
    plots = False
    wideModel = wideMLP(num_features, num_classes)
    wideModel.compile(optimizer=Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print("[INFO] training wide model on {0} dataset".format(chosenDataset))
    wideH = wideModel.fit(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                          validation_data=(X_test, Y_test), verbose = 0)
    wideModel.save("ModelsMLP\\"+"wideMPL"+chosenDataset+".hdf5")

    if plots :
        plt.style.use("ggplot")
        plt.figure()
        plt.plot(np.arange(0, EPOCHS), wideH.history["loss"], label="train_loss")
        plt.plot(np.arange(0, EPOCHS), wideH.history["val_loss"], label="val_loss")
        plt.plot(np.arange(0, EPOCHS), wideH.history["acc"], label="train_acc")
        plt.plot(np.arange(0, EPOCHS), wideH.history["val_acc"], label="val_acc")
        plt.title("Training Loss and Accuracy on "+chosenDataset)
        plt.xlabel("Epoch #")
        plt.ylabel("Loss/Accuracy")
        plt.legend(loc="lower left")
        plt.savefig("GraphsMLP\\"+"wideMPL"+chosenDataset+".png")
    return wideH.history["val_acc"][-1]

In [18]:
for i in range(len(Datasets)):
    chosenDataset = Datasets[i]
    X, X_test, Y, Y_test = locals()[chosenDataset+'_dataset']()
    X, X_test, num_features = scaleDataset(X, X_test, num_features)
    deep_val_acc.append(createDeepModel(num_classes, num_features, X, X_test, Y, Y_test, 
                                        EPOCHS, BATCH_SIZE, INIT_LR, chosenDataset))
    print("[INFO] deep model : last validation accuracy : ".format(deep_val_acc[-1]))
    wide_val_acc.append(createWideModel(num_classes, num_features, X, X_test, Y, Y_test, 
                                        EPOCHS, BATCH_SIZE, INIT_LR, chosenDataset))
    print("[INFO] wide model : last validation accuracy : ".format(wide_val_acc[-1]))

[INFO] loading derivatives dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 96)




[INFO] training deep model on derivatives dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on derivatives dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading lpc dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 20)




[INFO] training deep model on lpc dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on lpc dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading mfccs dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 26)




[INFO] training deep model on mfccs dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on mfccs dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading moments dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 10)




[INFO] training deep model on moments dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on moments dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading spectral dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 16)




[INFO] training deep model on spectral dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on spectral dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading marsyas dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 124)




[INFO] training deep model on marsyas dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on marsyas dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading mvd dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 420)




[INFO] training deep model on mvd dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on mvd dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading rh dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 60)




[INFO] training deep model on rh dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on rh dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading ssd dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 168)




[INFO] training deep model on ssd dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on ssd dataset
[INFO] wide model : last validation accuracy : 
[INFO] loading trh dataset ...
[INFO] splitting the data ...
[INFO] Labels format : (179555, 25)
[INFO] Features format : (179555, 420)




[INFO] training deep model on trh dataset
[INFO] deep model : last validation accuracy : 
[INFO] training wide model on trh dataset
[INFO] wide model : last validation accuracy : 


In [19]:
print(deep_val_acc)
print(wide_val_acc)

[0.26844142548044253, 0.21291526274573072, 0.25309793664963204, 0.20314109944634323, 0.23246358975986767, 0.3019687554250434, 0.2336053012492832, 0.20673331418055607, 0.29375400356586334, 0.21578346411596813]
[0.2838963000272663, 0.22229957385375343, 0.2636518068674901, 0.20848765048135615, 0.24315669390919287, 0.3193450466793006, 0.23995433250288462, 0.21547715290739788, 0.3089861052731604, 0.21700871601098695]


In [18]:
from heapq import nlargest
bests = 3
deep_max_scores_indices = []
wide_max_scores_indices = []
deep_val_acc_temp = [0.26844142548044253, 0.21291526274573072, 0.25309793664963204, 0.20314109944634323, 0.23246358975986767, 
                     0.3019687554250434, 0.2336053012492832, 0.20673331418055607, 0.29375400356586334, 0.21578346411596813]
wide_val_acc_temp = [0.2838963000272663, 0.22229957385375343, 0.2636518068674901, 0.20848765048135615, 0.24315669390919287, 
                     0.3193450466793006, 0.23995433250288462, 0.21547715290739788, 0.3089861052731604, 0.21700871601098695]
deep_max_scores = nlargest(bests, deep_val_acc_temp)
wide_max_scores = nlargest(bests, wide_val_acc_temp)

for i in range(bests):
    deep_max_scores_indices.append(deep_val_acc_temp.index(deep_max_scores[i]))
    wide_max_scores_indices.append(wide_val_acc_temp.index(wide_max_scores[i]))

print("Best scores with deep MLP on :\n{0} with {1}\n{2} with {3}\n{4} with {5}\n".format(Datasets[deep_max_scores_indices[0]], 
                                                                                        deep_max_scores[0], 
                                                                                        Datasets[deep_max_scores_indices[1]], 
                                                                                        deep_max_scores[1], 
                                                                                        Datasets[deep_max_scores_indices[2]], 
                                                                                        deep_max_scores[2]))
print("Best scores with wide MLP on :\n{0} with {1}\n{2} with {3}\n{4} with {5}\n".format(Datasets[wide_max_scores_indices[0]], 
                                                                                        wide_max_scores[0], 
                                                                                        Datasets[wide_max_scores_indices[1]], 
                                                                                        wide_max_scores[1], 
                                                                                        Datasets[wide_max_scores_indices[2]], 
                                                                                        wide_max_scores[2]))

Best scores with deep MLP on :
marsyas with 0.3019687554250434
ssd with 0.29375400356586334
derivatives with 0.26844142548044253

Best scores with wide MLP on :
marsyas with 0.3193450466793006
ssd with 0.3089861052731604
derivatives with 0.2838963000272663



In [18]:
target_model = ['wide', 5]
model_name = 'ModelsMLP\\'+target_model[0]+'MPL'+Datasets[target_model[1]]+'.hdf5'
print(model_name)
model = load_model(model_name)

ModelsMLP\wideMPLmarsyas.hdf5


In [19]:
def untagged_dataset(dataset):
    print("[INFO] loading untagged {0} dataset ...".format(dataset))
    if dataset in ('derivatives', 'lpc', 'mfccs', 'moments', 'spectral'):
        dataset = 'jmir'+dataset
    if dataset == 'marsyas' :
        new = 'new_'
    else :
        new = ''
    features = pd.read_csv('untagged_feature_sets/msd-'+dataset+'_test_'+new+'nolabels/msd-'+dataset+'_test_'+new+'nolabels.csv',
                                      delimiter=',', header=None).values[:, 2:-1]
    IDs = pd.read_csv('untagged_feature_sets/msd-'+dataset+'_test_'+new+'nolabels/msd-'+dataset+'_test_'+new+'nolabels.csv',
                                      delimiter=',', header=None).values[:, 1]
    np.asarray(features)
    np.asarray(IDs)
    print("[INFO] Features format : {0}".format(features.shape))
    print("[INFO] IDs format : {0}".format(IDs.shape))
    return IDs, features

IDs, guess_features = untagged_dataset(Datasets[target_model[1]])

[INFO] loading untagged marsyas dataset ...
[INFO] Features format : (92499, 124)
[INFO] IDs format : (92499,)


In [20]:
guess_labels = model.predict(guess_features)

In [21]:
guess_categories = []
for i in range(len(guess_labels)):
    guess_categories.append(categories[np.argmax(guess_labels[i])])

In [35]:
with open('result.csv', 'w', newline='') as csvfile:
    filewriter = csv.writer(csvfile, delimiter=',')
    filewriter.writerow(['id', 'genre'])
    for i in range(len(IDs)):
        filewriter.writerow([IDs[i], guess_categories[i]])