In [77]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf
import numpy as np

In [78]:
# Read the spotify data in from data.csv
url = 'https://media.githubusercontent.com/media/jossharlequin/spotify-popularity-project/main/Resources/data.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0.995,['Carl Woitschach'],0.708,158648,0.195,0,6KbQ3uYMLKb5jDxLF7wYDD,0.563,10,0.151,-12.428,1,Singende Bataillone 1. Teil,0,1928,0.0506,118.469,0.779,1928
1,0.994,"['Robert Schumann', 'Vladimir Horowitz']",0.379,282133,0.0135,0,6KuQTIu1KoTTkLXKrwlLPV,0.901,8,0.0763,-28.454,1,"Fantasiestücke, Op. 111: Più tosto lento",0,1928,0.0462,83.972,0.0767,1928
2,0.604,['Seweryn Goszczyński'],0.749,104300,0.22,0,6L63VW0PibdM1HDSBoqnoM,0.0,5,0.119,-19.924,0,Chapter 1.18 - Zamek kaniowski,0,1928,0.929,107.177,0.88,1928
3,0.995,['Francisco Canaro'],0.781,180760,0.13,0,6M94FkXd15sOAOQYRnWPN8,0.887,1,0.111,-14.734,0,Bebamos Juntos - Instrumental (Remasterizado),0,1928-09-25,0.0926,108.003,0.72,1928
4,0.99,"['Frédéric Chopin', 'Vladimir Horowitz']",0.21,687733,0.204,0,6N6tiFZ9vLTSOIxkj8qKrd,0.908,11,0.098,-16.829,1,"Polonaise-Fantaisie in A-Flat Major, Op. 61",1,1928,0.0424,62.149,0.0693,1928


In [79]:
# Drop unused columns
spotify_df = df.drop(columns=['artists', 'name', 'id', 'release_date', 'year'])
spotify_df['seconds'] = spotify_df['duration_ms']/1000
# spotify_df.info()
spotify_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169909 entries, 0 to 169908
Data columns (total 15 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   acousticness      169909 non-null  float64
 1   danceability      169909 non-null  float64
 2   duration_ms       169909 non-null  int64  
 3   energy            169909 non-null  float64
 4   explicit          169909 non-null  int64  
 5   instrumentalness  169909 non-null  float64
 6   key               169909 non-null  int64  
 7   liveness          169909 non-null  float64
 8   loudness          169909 non-null  float64
 9   mode              169909 non-null  int64  
 10  popularity        169909 non-null  int64  
 11  speechiness       169909 non-null  float64
 12  tempo             169909 non-null  float64
 13  valence           169909 non-null  float64
 14  seconds           169909 non-null  float64
dtypes: float64(10), int64(5)
memory usage: 19.4 MB


In [80]:
# Setting popularity as the target variable and setting the remaining columns as features
y = spotify_df.popularity.values
X = spotify_df.drop(columns='popularity').values

In [81]:
# Scaling the data using StandarScaler as a preprocessing step for the neural network
# Creating the StandardScalar instance
scaler = StandardScaler()

# Fitting the X data
X_scaler = scaler.fit(X)

# Scaling the X data
X_scaled = X_scaler.transform(X)

# Splitting training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=42)

In [82]:
# Binning the target variable into 10 groups  
bins = [-1, 20, 40, 60, 80, 100]
labels = [0, 2, 4, 6, 8]
y_train_binned = pd.cut(y_train, bins=bins, labels=labels)
y_test_binned = pd.cut(y_test, bins=bins, labels=labels)

# Converting the target variable to one-hot encoded format
num_classes = len(labels)
y_train_one_hot = tf.keras.utils.to_categorical(y_train_binned, num_classes=num_classes)
y_test_one_hot = tf.keras.utils.to_categorical(y_test_binned, num_classes=num_classes)

IndexError: index 6 is out of bounds for axis 1 with size 5

In [66]:
!pip install keras-tuner



In [67]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=100,
        step=2), activation=activation, input_dim=14))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=100,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=10, activation="softmax"))

    # Compile the model
    nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    return nn_model

In [68]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=10,
    hyperband_iterations=2)

Reloading Tuner from .\untitled_project\tuner0.json


In [69]:
# Running the kerastuner search for best hyperparameters
tuner.search(X_train, y_train_one_hot, epochs=10, validation_data=(X_test, y_test_one_hot))

In [76]:
# Defining the neural network
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=14))
nn_model.add(tf.keras.layers.Dense(10, activation="softmax"))

# Compile the sequential model together and customize metrics
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Training the model
fit_model = nn_model.fit(X_train, y_train_one_hot, epochs=10)

# Evaluating the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test, y_test_one_hot, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1328/1328 - 2s - loss: 1.4873 - accuracy: 0.4127 - 2s/epoch - 1ms/step
Loss: 1.4873112440109253, Accuracy: 0.4126606583595276


In [59]:
# Defining the neural network
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="tanh", input_dim=14))
nn_model.add(tf.keras.layers.Dense(10, activation="softmax"))

# Compile the sequential model together and customize metrics
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Training the model
fit_model = nn_model.fit(X_train, y_train_one_hot, epochs=10)

# Evaluating the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test, y_test_one_hot, verbose=2)
print(f"Loss: {model_loss}, Accuracy score: {model_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1328/1328 - 2s - loss: 1.4837 - accuracy: 0.4144 - 2s/epoch - 1ms/step
Loss: 1.4837090969085693, Accuracy score: 0.4144027531147003


In [74]:
# Defining the neural network
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=50, activation="tanh", input_dim=14))
nn_model.add(tf.keras.layers.Dense(units=50, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=10, activation="softmax"))

# Compile the sequential model together and customize metrics
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Training the model
fit_model = nn_model.fit(X_train, y_train_one_hot, epochs=10)

# Evaluating the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test, y_test_one_hot, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1328/1328 - 2s - loss: 1.4570 - accuracy: 0.4164 - 2s/epoch - 1ms/step
Loss: 1.4569848775863647, Accuracy: 0.41638025641441345


In [75]:
# Defining the neural network
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=50, activation="tanh", input_dim=14))
nn_model.add(tf.keras.layers.Dense(units=50, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=50, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=10, activation="softmax"))

# Compile the sequential model together and customize metrics
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Training the model
fit_model = nn_model.fit(X_train, y_train_one_hot, epochs=10)

# Evaluating the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test, y_test_one_hot, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1328/1328 - 2s - loss: 1.4517 - accuracy: 0.4210 - 2s/epoch - 1ms/step
Loss: 1.4517264366149902, Accuracy: 0.42104148864746094
