In [1]:
#import all the relevant files and libraries 
import sys
sys.path.append('../Back_End/')
sys.path.append('../Data_Management/')

import pandasDB
import song_result_interface
import CSVInterface

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from genres import classes, NUM_GENRES
from ANN_parameter import Parameter
from ANN_result import Result
from ANN_class import ANN
from ANN_encode import encode, decode
import random

Using TensorFlow backend.


In [12]:
indepent_features = ['mfcc', 'spectral_contrast']

# set your experiment seed for train test split
EXPERIMENT_SEED = 42
FEATURE_COUNT = 200
VALIDATION_PERCENT = 0.1
DEFAULT_LAYERS = 1
DEFAULT_NODES = len(classes) + 1
DEFAULT_H_ACTIVATION = 'relu'
DEFAULT_O_ACTIVATION = 'softmax'
DEFAULT_LOSS = 'categorical_crossentropy'
DEFAULT_BATCH = 200
DEFAULT_EPOCHS = 20
TEST_RATIO = 0.34
DATA_SET = 'cleanLarge'
samples = 20

In [5]:
print('Initializing Data Management interface...')
# reads the data from the csv
reader = CSVInterface.featRead()

DB = pandasDB.DataBase()

# D = { X | Y }
# D[X][Y]
D = {}
# X
D['X'] = {
    'small'	: reader.getSubset(
        reader.getFrame('features'),
        sub='small'
    ),
    'cleanLarge': reader.getSubset(
        reader.getFrame('features'),
        sub='cleanLarge'
    )
}

# Y
D['Y'] = {
    'small'	: reader.getSubset(
        reader.getFrame('track')['genre_top'],
        sub='small'
    ),
    'cleanLarge': reader.getSubset(
        reader.getFrame('track')['genre_top'],
        sub='cleanLarge'
    ),
}

Initializing Data Management interface...
Reading features.pkl
Reading tracks.pkl
Reading echonest.pkl
Reading genres.pkl


In [6]:
indepent_features = ['mfcc', 'spectral_contrast']

print('Constructing datasets')
print('X')
# the ind vars
# X =  pd.DataFrame(D['X'][DATA_SET].iloc[:, indepent_features])
X = pd.DataFrame(D['X'][DATA_SET][indepent_features])

print('Y')
# the dependent var
Y = pd.DataFrame(D['Y'][DATA_SET], columns=['genre_top'])

print('train/validation split')
# Test and train split using encoded Y labels (vector of 0s with one 1)
trainx, valx, trainy, valy = train_test_split(
    X.values,
    encode(Y),  # one hot encoder, see ANN_encode.py
    test_size=VALIDATION_PERCENT,  # validation size
    random_state=EXPERIMENT_SEED
)

sample = trainx[0].copy()

print('Data done!\n\n********')

# Build the neural network
print('\nBuilding neural net')
print('input : {}'.format(len(sample)))
print('output: {}\n'.format(NUM_GENRES))

net = 0
history = 0
callback = 0

Constructing datasets
X
Y
train/validation split
Data done!

********

Building neural net
input : 189
output: 16



In [8]:
def make_and_train_model(h_layers, h_nodes):
    # create an ANN with specified parameters
    net = ANN(p=Parameter(
        num_input=len(sample),
        num_hidden_layers=h_layers,
        nodes_per_hidden=h_nodes,
        num_output=NUM_GENRES,
        hidden_activation=DEFAULT_H_ACTIVATION,
        output_activation=DEFAULT_O_ACTIVATION,
        initialize=False,
        loss_function=DEFAULT_LOSS,
        features=indepent_features
    ))
    return net

In [9]:
#make the model with 1 hidden later and 1 node 
model = make_and_train_model(1,1)


built using:
{'num_input': 189, 'num_hidden_layers': 1, 'nodes_per_hidden': 1, 'num_output': 16, 'hidden_activation': 'relu', 'output_activation': 'softmax', 'initialize': False, 'learning_rate': 10, 'loss_function': 'categorical_crossentropy', 'features': ['mfcc', 'spectral_contrast']}

ANN() constructor finished
********



In [11]:
#train the model on the split data 
history, callback = model.train(
            trainx,
            trainy,
            num_iter=DEFAULT_EPOCHS,
            test_ratio=TEST_RATIO,
            batch=DEFAULT_BATCH,
            interactive=False
        )

Training model with 20 epochs, batch size 200
Training with validation ratio of 0.34 and batch size of 200.
Train on 29461 samples, validate on 15177 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
#predict for the first 20 samples based on the trained data
for index in range(0, samples):
    song = DB.query()['track_data']
    song['X'] = song['X'][indepent_features].values
    # song['X'] = song['X'].iloc[:, indepent_features].values
    print(model.predict(song))

{'track_id': 64005, 'song_title': array(['Giraffes'], dtype=object), 'artist_name': array(['Gosprom'], dtype=object), 'date': array(['2012-04-25 11:49:10'], dtype=object), 'top_genre': array(['Pop'], dtype=object), 'set': array(['small'], dtype=object), 'X': array([[ 9.69544983e+00,  1.68701708e+00,  8.33089471e-01,
         7.98224747e-01, -4.78758104e-02,  2.21508169e+00,
         7.29529560e-03,  3.29090089e-01,  5.43230549e-02,
         7.85462856e-02,  6.55077100e-02,  8.15712690e-01,
         5.56947708e-01, -2.89715044e-02,  1.12592734e-01,
         4.21759009e-01,  9.17575285e-02,  3.73558789e-01,
         5.30106604e-01, -3.60897966e-02,  7.55250702e+01,
         2.36582230e+02,  6.60647659e+01,  9.13702621e+01,
         4.24546661e+01,  8.25420609e+01,  3.73244896e+01,
         6.32670593e+01,  3.10166702e+01,  4.02427177e+01,
         3.30039215e+01,  4.44527397e+01,  2.74063911e+01,
         3.87385674e+01,  3.38395462e+01,  3.86182709e+01,
         2.96203938e+01,  5.45681

In [17]:
#gets the score of a trained set, need to predict as well to get the mean score 
print(model.get_mean_score())

3.075


In [18]:
#returns mfcc and spectral_contrast which is correct 
print(model.get_features())

['mfcc', 'spectral_contrast']


In [25]:
model.save_to_disk('qa_testing')

Saved
	../ML_Algs/trained_models/qa_testing_parameters.csv
	../ML_Algs/trained_models/qa_testing_weights.npy
	../ML_Algs/trained_models/qa_testing_features.npy



In [28]:
a = pd.read_csv('../ML_Algs/trained_models/qa_testing_parameters.csv')
#correct model parameters based on input 
print(a)

Empty DataFrame
Columns: [189, 1, 1.1, 16, relu, softmax, False, 10, categorical_crossentropy]
Index: []


In [32]:
#correctly saved to disk
b = np.load('../ML_Algs/trained_models/qa_testing_features.npy')
print(b)

['mfcc' 'spectral_contrast']
