In [1]:
import os
import glob
import pandas as pd

In [2]:
# Define working directory
os.chdir(r"C:\Users\Cristina Bardan\Desktop\Repositories\FinalProject-Spotify\Original_data")

In [3]:
# Use glob to match the pattern ‘csv’
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

# Combine all files in the list and export as CSV
df = pd.concat([pd.read_csv(f) for f in all_filenames ])
df.reset_index(drop=True, inplace=True)

In [4]:
df2 = df.drop(['track', 'artist', 'uri'], axis=1)

In [5]:
# Sin tempo & speechiness
data = df2
X = data[['danceability', 'energy','key', 'loudness','mode', 'acousticness', 'instrumentalness', 'liveness','valence', 'duration_ms', 'time_signature','chorus_hit', 'sections']]  
y = data.target

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)

## Data Preprocessing

In [7]:
#Scale features (X) using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler(feature_range=(0,1)).fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test) 

X_train_scaled

array([[0.78947368, 0.52283395, 0.81818182, ..., 0.8       , 0.19905999,
        0.06508876],
       [0.54048583, 0.83094117, 0.81818182, ..., 0.8       , 0.21633373,
        0.04142012],
       [0.5       , 0.66588373, 0.81818182, ..., 0.8       , 0.20480589,
        0.03550296],
       ...,
       [0.57692308, 0.36777999, 0.81818182, ..., 0.8       , 0.15844718,
        0.09467456],
       [0.41902834, 0.16070793, 0.63636364, ..., 0.8       , 0.200772  ,
        0.04733728],
       [0.29554656, 0.24873856, 0.81818182, ..., 0.6       , 0.12021911,
        0.06508876]])

One-hot encode the labels

In [8]:
#One-hot encode output labels (y)
from tensorflow.keras.utils import to_categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

y_train_categorical

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       ...,
       [0., 1.],
       [0., 1.],
       [1., 0.]], dtype=float32)

## Creating and defining our Deep Learning Model Architecture

In [9]:
#Create a sequential model
from tensorflow.keras.models import Sequential
model = Sequential() 

from tensorflow.keras.layers import Dense
number_inputs = 13  

#Create hidden layers
model.add(Dense(units=14,activation='relu', input_dim=number_inputs))
model.add(Dense(units=120,activation='relu'))
model.add(Dense(units=80,activation='relu'))

#Create output layer
number_classes = 2
model.add(Dense(units=number_classes, activation='softmax')) 

Number of Hidden Nodes 
https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw#:~:text=The%20number%20of%20hidden%20neurons,size%20of%20the%20input%20layer.

In [10]:
#Model Summary
model.summary() 

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 14)                196       
_________________________________________________________________
dense_1 (Dense)              (None, 120)               1800      
_________________________________________________________________
dense_2 (Dense)              (None, 80)                9680      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 162       
Total params: 11,838
Trainable params: 11,838
Non-trainable params: 0
_________________________________________________________________


In [11]:
#Compile the Model
import tensorflow as tf
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

## Training the Model

In [12]:
#Training the Model
history = model.fit(X_train_scaled, y_train_categorical, epochs=500, batch_size=2000, shuffle=True, verbose=2)

Epoch 1/500
16/16 - 0s - loss: 0.6548 - accuracy: 0.6629
Epoch 2/500
16/16 - 0s - loss: 0.5873 - accuracy: 0.6910
Epoch 3/500
16/16 - 0s - loss: 0.5530 - accuracy: 0.7065
Epoch 4/500
16/16 - 0s - loss: 0.5390 - accuracy: 0.7229
Epoch 5/500
16/16 - 0s - loss: 0.5289 - accuracy: 0.7324
Epoch 6/500
16/16 - 0s - loss: 0.5215 - accuracy: 0.7373
Epoch 7/500
16/16 - 0s - loss: 0.5161 - accuracy: 0.7405
Epoch 8/500
16/16 - 0s - loss: 0.5129 - accuracy: 0.7430
Epoch 9/500
16/16 - 0s - loss: 0.5106 - accuracy: 0.7446
Epoch 10/500
16/16 - 0s - loss: 0.5096 - accuracy: 0.7453
Epoch 11/500
16/16 - 0s - loss: 0.5072 - accuracy: 0.7474
Epoch 12/500
16/16 - 0s - loss: 0.5064 - accuracy: 0.7466
Epoch 13/500
16/16 - 0s - loss: 0.5060 - accuracy: 0.7478
Epoch 14/500
16/16 - 0s - loss: 0.5043 - accuracy: 0.7487
Epoch 15/500
16/16 - 0s - loss: 0.5034 - accuracy: 0.7498
Epoch 16/500
16/16 - 0s - loss: 0.5029 - accuracy: 0.7501
Epoch 17/500
16/16 - 0s - loss: 0.5025 - accuracy: 0.7516
Epoch 18/500
16/16 - 0s

## Validation of the Model

In [13]:
#Compare Model performace between training and testing data
model_loss_train, model_accuracy_train = model.evaluate(X_train_scaled, y_train_categorical, verbose=2)
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)

print(f"Train --> Loss: {model_loss_train}, Accuracy: {model_accuracy_train}")    
print(f"Test --> Loss: {model_loss}, Accuracy: {model_accuracy}")

964/964 - 1s - loss: 0.4513 - accuracy: 0.7824
322/322 - 0s - loss: 0.4866 - accuracy: 0.7678
Train --> Loss: 0.4513172209262848, Accuracy: 0.7824126482009888
Test --> Loss: 0.486555814743042, Accuracy: 0.7678310871124268


## Saving the Trained Model

In [14]:
# Save the model
model.save("../Models/h5/stempo&speech.h5")