In [97]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.contrib.keras as keras
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [98]:
# set random seed
np.random.seed(123)
tf.set_random_seed(123)

In [99]:
# import the training data 
X_total = pd.read_csv('X_musical_features.csv').drop('track_id', axis=1)

In [100]:
# import single genres y values
y_single_total = pd.read_csv('y_genres_onehot_single.csv').drop('Unnamed: 0', axis=1)

# import non-one-hotted y values
y_cold_single_total = pd.read_csv('y_genres_single.csv')

In [101]:
print(y_cold_single_total.shape)
print(y_cold_single_total.iloc[0:5,0:5])

(63656, 1)
    0
0  21
1  21
2  21
3  10
4  76


In [102]:
X_total.shape

(63656, 518)

In [103]:
y_single_total.shape

(63656, 139)

In [104]:
X_train, X_test, y_train, y_test = train_test_split(
    X_total, y_single_total, test_size=0.3, shuffle=False, stratify = None
)

# also split y cold 70/30
y_cold_train = y_cold_single_total.head(44559)
y_cold_test = y_cold_single_total.tail(y_cold_single_total.shape[0] - 44559)

# then do this for 
# *_train together
# *_test together
def unison_shuffled_copies(a, b, c):
    assert len(a) == b.shape[0]
    assert a.shape[0] == c.shape[0]
    
    p = np.random.permutation(len(a))
    return a.iloc[p, :], b.iloc[p, :], c.iloc[p, :]

#new variables with shuffled 
X_train_shuf, y_train_shuf, y_cold_train_shuf = unison_shuffled_copies(X_train, y_train, y_cold_train)

#also do this for test set

Testing for correctness in shuffling

In [105]:
#Testing values

print(X_train_shuf.iloc[0:5,0:5])
print(y_train_shuf.iloc[0:5,0:5]) 
print(y_cold_train_shuf.iloc[0:5,0:5])       #this should be the shuffled value
print(y_cold_train.iloc[0:5,0:5])            #this and below should output unshuffled values
print(y_cold_single_total.iloc[0:5,0:5])

       ('chroma_cens', 'kurtosis', '01')  ('chroma_cens', 'kurtosis', '02')  \
33009                          -0.865313                          -0.436609   
11814                           0.046079                          -0.471472   
23144                          -0.650234                          -0.715156   
25377                           0.189053                           0.324736   
22114                           0.369846                           0.532329   

       ('chroma_cens', 'kurtosis', '03')  ('chroma_cens', 'kurtosis', '04')  \
33009                          -0.734898                          -0.521307   
11814                          -0.561838                          -0.515131   
23144                          -0.548415                          -0.533902   
25377                           0.355818                          -0.285775   
22114                           0.446672                          -0.202046   

       ('chroma_cens', 'kurtosis', '05')  
33009  

In [106]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_shuf)
X_test_scaled = scaler.transform(X_test)

In [47]:
# del X_train, X_test

In [107]:
print(X_train_scaled.shape)
print(X_test_scaled.shape)

(44559, 518)
(19097, 518)


In [108]:
model = keras.models.Sequential()

In [109]:
model.add(
    keras.layers.Dense(
        units=518,
        input_dim=X_train_scaled.shape[1],
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh'
    )
)

In [110]:
model.add(
    keras.layers.Dense(
        units=474,
        input_dim=518,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh'
    )
)

In [111]:
model.add(
    keras.layers.Dense(
        units=y_train.shape[1],
        input_dim=474,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='softmax'
    )
)

In [112]:
sgd_optimizer = keras.optimizers.SGD(lr=0.001, decay=1e-7, momentum=.9)

In [113]:
model.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy')

In [115]:
history = model.fit(
    X_train_scaled, y_train_shuf,
    batch_size=100, epochs=10,
    verbose=1,
    validation_split=0.1
)

Train on 40103 samples, validate on 4456 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [116]:
y_train_pred = model.predict_classes(X_train_scaled, verbose=0)

In [117]:
train_acc = np.sum(y_cold_single_total.head(44559).values.flatten() == y_train_pred, axis=0) / 44559

In [119]:
print(f"Accuracy on training set: {train_acc}")

Accuracy on training set: 0.08317062770708498
