In [2]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd
import tensorflow as tf
import numpy as np




In [39]:
# Read the spotify data in from data.csv
url = 'https://media.githubusercontent.com/media/jossharlequin/spotify-popularity-project/main/Resources/sql_spotify_data.csv'
spotify_df = pd.read_csv(url)
spotify_df.head()

Unnamed: 0,acousticness,danceability,energy,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,valence,duration_seconds
0,0.995,0.708,0.195,0,0.563,10,0.151,-12.428,1,0,0.0506,118.469,0.779,158.648
1,0.994,0.379,0.0135,0,0.901,8,0.0763,-28.454,1,0,0.0462,83.972,0.0767,282.133
2,0.604,0.749,0.22,0,0.0,5,0.119,-19.924,0,0,0.929,107.177,0.88,104.3
3,0.995,0.781,0.13,0,0.887,1,0.111,-14.734,0,0,0.0926,108.003,0.72,180.76
4,0.99,0.21,0.204,0,0.908,11,0.098,-16.829,1,1,0.0424,62.149,0.0693,687.733


In [40]:
# Setting popularity as the target variable and setting the remaining columns as features
y = spotify_df.popularity.values
X = spotify_df.drop(columns='popularity').values

In [41]:
# Scaling the data using StandarScaler as a preprocessing step for the neural network
# Creating the StandardScalar instance
scaler = StandardScaler()

# Fitting the X data
X_scaler = scaler.fit(X)

# Scaling the X data
X_scaled = X_scaler.transform(X)

# Splitting training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=42)

In [42]:
# Binning the target variable into groups based  
bins = [-1, 50, 100]
labels = [0, 1]
y_train_binned = pd.cut(y_train, bins=bins, labels=labels)
y_test_binned = pd.cut(y_test, bins=bins, labels=labels)

encoder = OneHotEncoder(sparse=False)
y_train_one_hot = encoder.fit_transform(y_train_binned.reshape(-1,1))
y_test_one_hot = encoder.transform(y_test_binned.reshape(-1,1))



In [43]:
# Defining the neural network
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=15, activation="relu", input_dim=13))
nn_model.add(tf.keras.layers.Dense(units=15, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=15, activation="relu"))
nn_model.add(tf.keras.layers.Dense(2, activation="softmax"))

# Compile the sequential model together and customize metrics
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Training the model
fit_model = nn_model.fit(X_train, y_train_one_hot, epochs=100)

# Evaluating the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test, y_test_one_hot, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Finding predicted y classes
y_predicted = nn_model.predict(X_test)

# Converting y_predicted and y_test_one_hot into integer values for confusion matrix and accuracy score
y_test_classes = np.argmax(y_test_one_hot, axis=1)
y_predicted_classes = np.argmax(y_predicted, axis=1)

# Creating confusion matrix
cm = confusion_matrix(y_test_classes, y_predicted_classes)
cm_df = pd.DataFrame(
    cm, index=['Actual 0','Actual 1'], columns=['Predicted 0','Predicted 1']
)

# Creating accuracy score
acc_score = accuracy_score(y_test_classes, y_predicted_classes)

# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score: {acc_score}")
print("Classification Report")
print(classification_report(y_test_classes, y_predicted_classes))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,31659,1809
Actual 1,5192,3818


Accuracy Score: 0.8351852723762889
Classification Report
              precision    recall  f1-score   support

           0       0.86      0.95      0.90     33468
           1       0.68      0.42      0.52      9010

    accuracy                           0.84     42478
   macro avg       0.77      0.68      0.71     42478
weighted avg       0.82      0.84      0.82     42478



In [31]:
# Binning the data into quintiles based on number of samples per group instead of absolute values that the data falls into
labels=[0,1]
q_binned_data = pd.qcut(spotify_df['popularity'], 2, labels=labels, precision=0)

display(type(q_binned_data))

# Scaling the data using StandarScaler as a preprocessing step for the neural network
# Creating the StandardScalar instance
scaler = StandardScaler()

# Fitting the X data
X_scaler = scaler.fit(X)

# Scaling the X data
X_scaled = X_scaler.transform(X)

# Splitting training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, q_binned_data, random_state=42)

# Convert y_train and y_test from Pandas Series to NumPy arrays
y_train_np = np.array(y_train).reshape(-1, 1)
y_test_np = np.array(y_test).reshape(-1, 1)

encoder = OneHotEncoder(sparse=False)
y_train_one_hot = encoder.fit_transform(y_train_np)
y_test_one_hot = encoder.transform(y_test_np)

pandas.core.series.Series



In [37]:
# Defining the neural network
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=18, activation="relu", input_dim=13))
nn_model.add(tf.keras.layers.Dense(units=18, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=18, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=18, activation="relu"))
nn_model.add(tf.keras.layers.Dense(2, activation="softmax"))

# Compile the sequential model together and customize metrics
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Training the model
fit_model = nn_model.fit(X_train, y_train_one_hot, epochs=100)

# Evaluating the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test, y_test_one_hot, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Finding predicted y classes
y_predicted = nn_model.predict(X_test)

# Converting y_predicted and y_test_one_hot into integer values for confusion matrix and accuracy score
y_test_classes = np.argmax(y_test_one_hot, axis=1)
y_predicted_classes = np.argmax(y_predicted, axis=1)

# Creating confusion matrix
cm = confusion_matrix(y_test_classes, y_predicted_classes)
cm_df = pd.DataFrame(
    cm, index=['Actual 0','Actual 1'], columns=['Predicted 0','Predicted 1']
)

# Creating accuracy score
acc_score = accuracy_score(y_test_classes, y_predicted_classes)

# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score: {acc_score}")
print("Classification Report")
print(classification_report(y_test_classes, y_predicted_classes))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,16427,4681
Actual 1,4366,17004


Accuracy Score: 0.787019162860775
Classification Report
              precision    recall  f1-score   support

           0       0.79      0.78      0.78     21108
           1       0.78      0.80      0.79     21370

    accuracy                           0.79     42478
   macro avg       0.79      0.79      0.79     42478
weighted avg       0.79      0.79      0.79     42478

