In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets

In [3]:
audio = pd.read_csv('data/audio_features_hot_100_1958_2019.csv')
genre = pd.read_csv('data/songGenre.csv')[['track_id','supergenre']].rename(columns = {'songid':'SongID'})
genre = genre.drop_duplicates()
genre['supergenre'].unique()


array(['pop', 'country', 'rap', 'hiphop', 'other', 'latin', 'house',
       'folk', 'r&b', 'adult standards', 'rock', 'metal', 'show tunes',
       'soul', 'jazz'], dtype=object)

In [4]:
join_df = genre.merge(audio, on = 'track_id', how = 'left').dropna(how='any')
# target = join_df[['supergenre']]
feature_names =['spotify_track_duration_ms','danceability','energy','loudness','speechiness','acousticness','instrumentalness','valence']
features = join_df[feature_names]
features.head()

Unnamed: 0,spotify_track_duration_ms,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence
0,270740.0,0.734,0.836,-4.803,0.0735,0.017,1.6e-05,0.623
2,215733.0,0.572,0.385,-6.362,0.0308,0.371,0.0,0.323
3,215733.0,0.572,0.385,-6.362,0.0308,0.371,0.0,0.323
4,196760.0,0.542,0.674,-4.169,0.21,0.0588,0.0,0.667
5,228185.0,0.948,0.623,-5.725,0.168,0.00124,1e-06,0.856


In [5]:
join_df['target'] = join_df['supergenre'].replace('pop',float(0)).replace('country',float(1))\
.replace('hiphop',float(2)).replace('other',float(3)).replace('latin',float(3)).replace('latin',float(4))\
.replace('house',float(5)).replace('folk',float(6)).replace('r&b',float(7)).replace('adult standards',float(8))\
.replace('rock',float(9)).replace('metal',float(10)).replace('show tunes',float(11)).replace('soul',float(12))\
.replace('rap',float(13)).replace('jazz',float(14))

target = join_df[['target']]

In [6]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=1)
y_train.head()

Unnamed: 0,target
9035,9.0
11009,0.0
7782,9.0
9085,1.0
25491,5.0


In [7]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
X_scaler = StandardScaler().fit(X_train)

In [8]:
# Transform the training and testing data using the X_scaler

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
from tensorflow.keras.utils import to_categorical

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [18]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [19]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=8))
model.add(Dense(units=15, activation='softmax'))

In [20]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 6)                 54        
_________________________________________________________________
dense_7 (Dense)              (None, 15)                105       
Total params: 159
Trainable params: 159
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [14]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
17670/17670 - 2s - loss: 2.2280 - acc: 0.2876
Epoch 2/100
17670/17670 - 1s - loss: 1.8640 - acc: 0.3663
Epoch 3/100
17670/17670 - 1s - loss: 1.7773 - acc: 0.3765
Epoch 4/100
17670/17670 - 2s - loss: 1.7413 - acc: 0.3801
Epoch 5/100
17670/17670 - 2s - loss: 1.7212 - acc: 0.3810
Epoch 6/100
17670/17670 - 2s - loss: 1.7083 - acc: 0.3824
Epoch 7/100
17670/17670 - 2s - loss: 1.6997 - acc: 0.3838
Epoch 8/100
17670/17670 - 1s - loss: 1.6937 - acc: 0.3857
Epoch 9/100
17670/17670 - 2s - loss: 1.6899 - acc: 0.3869
Epoch 10/100
17670/17670 - 2s - loss: 1.6871 - acc: 0.3871
Epoch 11/100
17670/17670 - 1s - loss: 1.6850 - acc: 0.3878
Epoch 12/100
17670/17670 - 1s - loss: 1.6830 - acc: 0.3881
Epoch 13/100
17670/17670 - 2s - loss: 1.6817 - acc: 0.3898
Epoch 14/100
17670/17670 - 2s - loss: 1.6805 - acc: 0.3891
Epoch 15/100
17670/17670 - 2s - loss: 1.6794 - acc: 0.3895
Epoch 16/100
17670/17670 - 2s - loss: 1.6787 - acc: 0.3911
Epoch 17/100
17670/17670 - 2s - loss: 1.6776 - acc: 0.3892
Epoch 

<tensorflow.python.keras.callbacks.History at 0x1d726231cc8>

In [15]:
deep_model = Sequential()
deep_model.add(Dense(units=20, activation='selu', input_dim=8)) #selu vs relu?
deep_model.add(Dense(units=25, activation='selu'))
deep_model.add(Dense(units=25, activation='selu'))
deep_model.add(Dense(units=15, activation='softmax'))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [16]:
deep_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 20)                180       
_________________________________________________________________
dense_3 (Dense)              (None, 25)                525       
_________________________________________________________________
dense_4 (Dense)              (None, 25)                650       
_________________________________________________________________
dense_5 (Dense)              (None, 15)                390       
Total params: 1,745
Trainable params: 1,745
Non-trainable params: 0
_________________________________________________________________


In [17]:
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
17670/17670 - 2s - loss: 1.8805 - acc: 0.3516
Epoch 2/100
17670/17670 - 2s - loss: 1.7015 - acc: 0.3790
Epoch 3/100
17670/17670 - 2s - loss: 1.6814 - acc: 0.3895
Epoch 4/100
17670/17670 - 2s - loss: 1.6674 - acc: 0.3913
Epoch 5/100
17670/17670 - 2s - loss: 1.6564 - acc: 0.3960
Epoch 6/100
17670/17670 - 2s - loss: 1.6494 - acc: 0.3993
Epoch 7/100
17670/17670 - 3s - loss: 1.6430 - acc: 0.3993
Epoch 8/100
17670/17670 - 2s - loss: 1.6376 - acc: 0.4016
Epoch 9/100
17670/17670 - 2s - loss: 1.6348 - acc: 0.4035
Epoch 10/100
17670/17670 - 2s - loss: 1.6293 - acc: 0.4065
Epoch 11/100
17670/17670 - 2s - loss: 1.6278 - acc: 0.4054
Epoch 12/100
17670/17670 - 2s - loss: 1.6227 - acc: 0.4090
Epoch 13/100
17670/17670 - 2s - loss: 1.6185 - acc: 0.4109
Epoch 14/100
17670/17670 - 2s - loss: 1.6183 - acc: 0.4105
Epoch 15/100
17670/17670 - 2s - loss: 1.6147 - acc: 0.4145
Epoch 16/100
17670/17670 - 2s - loss: 1.6122 - acc: 0.4098
Epoch 17/100
17670/17670 - 2s - loss: 1.6106 - acc: 0.4132
Epoch 

<tensorflow.python.keras.callbacks.History at 0x1d726617648>