![Billboard banner](https://www.clipartkey.com/mpngs/m/62-628657_billboard-logo-png-billboard-top-100.png)

# Neural Network Machine Learning Model

In [1]:
# Import the neccessary libraries
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
print("All neccessary libraries imported")

All neccessary libraries imported


In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(42)

In [3]:
# Raw data from Resources directory (change the link as needed)
attribute_data = "Resources/BillboardFromLast20/songAttributes_1999-2019.csv"

In [4]:
# Read the wildfire data into a dataframe
df_attributes = pd.read_csv(attribute_data)
# Drop the columns with string values
df_attributes.drop(columns=['Unnamed: 0','Album','Artist','Name'],inplace=True)
# Convert the boolean column to integer
df_attributes["Explicit"] = df_attributes["Explicit"].astype(int)
# Let's see the shape and the first 5 rows of the dataframe
print('\033[1m'+"The shape of the {} dataframe is {}:" .format("attribute",df_attributes.shape)+'\033[0m')
df_attributes.head()

[1mThe shape of the attribute dataframe is (154931, 14):[0m


Unnamed: 0,Acousticness,Danceability,Duration,Energy,Explicit,Instrumentalness,Liveness,Loudness,Mode,Popularity,Speechiness,Tempo,TimeSignature,Valence
0,0.000728,0.52,234947,0.904,0,0.0103,0.0634,-5.03,1,35,0.0309,106.022,4,0.365
1,0.0182,0.581,239573,0.709,0,0.000664,0.174,-4.909,1,31,0.0282,120.027,4,0.408
2,0.000473,0.572,198400,0.918,0,0.000431,0.0977,-3.324,0,30,0.0559,144.061,4,0.37
3,0.00097,0.596,231453,0.661,0,3.3e-05,0.113,-5.051,1,35,0.0254,111.975,4,0.183
4,3.6e-05,0.52,222520,0.808,0,1e-05,0.08,-4.553,0,21,0.0318,92.721,4,0.666


In [5]:
# Let's see the column names in the dataframe
print(df_attributes.columns.tolist())

['Acousticness', 'Danceability', 'Duration', 'Energy', 'Explicit', 'Instrumentalness', 'Liveness', 'Loudness', 'Mode', 'Popularity', 'Speechiness', 'Tempo', 'TimeSignature', 'Valence']


In [6]:
# Basic information of the df_fire dataframe by the info() method
df_attributes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 154931 entries, 0 to 154930
Data columns (total 14 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Acousticness      154931 non-null  float64
 1   Danceability      154931 non-null  float64
 2   Duration          154931 non-null  int64  
 3   Energy            154931 non-null  float64
 4   Explicit          154931 non-null  int32  
 5   Instrumentalness  154931 non-null  float64
 6   Liveness          154931 non-null  float64
 7   Loudness          154931 non-null  float64
 8   Mode              154931 non-null  int64  
 9   Popularity        154931 non-null  int64  
 10  Speechiness       154931 non-null  float64
 11  Tempo             154931 non-null  float64
 12  TimeSignature     154931 non-null  int64  
 13  Valence           154931 non-null  float64
dtypes: float64(9), int32(1), int64(4)
memory usage: 16.0 MB


## Create a new column for popularity rank

In [7]:
# Create a new column that bins the popularity from 0-9
df_attributes["Rank"] = (df_attributes["Popularity"]/10).astype(int)
# Let's see the new column
df_attributes.head(10)

Unnamed: 0,Acousticness,Danceability,Duration,Energy,Explicit,Instrumentalness,Liveness,Loudness,Mode,Popularity,Speechiness,Tempo,TimeSignature,Valence,Rank
0,0.000728,0.52,234947,0.904,0,0.0103,0.0634,-5.03,1,35,0.0309,106.022,4,0.365,3
1,0.0182,0.581,239573,0.709,0,0.000664,0.174,-4.909,1,31,0.0282,120.027,4,0.408,3
2,0.000473,0.572,198400,0.918,0,0.000431,0.0977,-3.324,0,30,0.0559,144.061,4,0.37,3
3,0.00097,0.596,231453,0.661,0,3.3e-05,0.113,-5.051,1,35,0.0254,111.975,4,0.183,3
4,3.6e-05,0.52,222520,0.808,0,1e-05,0.08,-4.553,0,21,0.0318,92.721,4,0.666,2
5,0.0106,0.353,263880,0.754,0,7.3e-05,0.12,-4.57,1,22,0.0452,138.003,4,0.546,2
6,0.0436,0.629,214973,0.787,0,4.7e-05,0.1,-5.657,1,37,0.0277,100.44,4,0.476,3
7,0.000402,0.646,206067,0.853,0,9e-05,0.117,-4.685,1,31,0.0278,127.963,4,0.699,3
8,0.000521,0.645,216400,0.84,0,1.1e-05,0.037,-4.995,1,20,0.0697,140.025,4,0.795,2
9,0.00038,0.601,212013,0.861,0,2e-06,0.131,-4.69,1,22,0.0319,120.017,4,0.615,2


## Creating input and output datasets

In [8]:
import numpy as np
# Create a copy of df_attributes
df_copy = df_attributes.copy()
# Create X and Y numpy arrays
y = df_copy[['Rank']].to_numpy() # Keep only the rank column
df_copy.drop(columns=['Popularity','Rank'],inplace=True) # Keep only the attributes
X = df_copy.to_numpy() # Convert the dataframe to numpy array
print(X.shape)
print(y.shape)

(154931, 13)
(154931, 1)


## Creating train and test splits

In [9]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

## Data Preprocessing

In [10]:
# Importing StandardScaler from sklearn library
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [11]:
# Scaling both test and train data for X dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Install tensorflow module (if not available) by uncommenting below
#!pip install keras
#!pip install --upgrade tensorflow

## One hot encoding of labels

In [13]:
from tensorflow.keras.utils import to_categorical
# Converting the labels (y_train and y_test) to categorical values
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

## Defining the model architecture

In [14]:
from tensorflow.keras.models import Sequential
# Creating a sequential model
model = Sequential()

## Defining the input of the model

In [15]:
from tensorflow.keras.layers import Dense
number_inputs = 13
number_hidden_nodes = 39
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

## Defining the output of the model

In [16]:
# Defining the output layer
number_classes = 10 # Labels we are trying to predict (10 in this case)
model.add(Dense(units=number_classes, activation='softmax'))

## Summary of the model

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 39)                546       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                400       
Total params: 946
Trainable params: 946
Non-trainable params: 0
_________________________________________________________________


## Compile the model

In [18]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## Training the model

In [19]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100, # Thousand iterations or loops
    shuffle=True,
    verbose=2
)

Epoch 1/100
3632/3632 - 2s - loss: 1.6815 - accuracy: 0.3222
Epoch 2/100
3632/3632 - 2s - loss: 1.6572 - accuracy: 0.3286
Epoch 3/100
3632/3632 - 2s - loss: 1.6542 - accuracy: 0.3295
Epoch 4/100
3632/3632 - 2s - loss: 1.6523 - accuracy: 0.3304
Epoch 5/100
3632/3632 - 2s - loss: 1.6510 - accuracy: 0.3304
Epoch 6/100
3632/3632 - 2s - loss: 1.6499 - accuracy: 0.3304
Epoch 7/100
3632/3632 - 2s - loss: 1.6488 - accuracy: 0.3316
Epoch 8/100
3632/3632 - 2s - loss: 1.6484 - accuracy: 0.3311
Epoch 9/100
3632/3632 - 2s - loss: 1.6475 - accuracy: 0.3310
Epoch 10/100
3632/3632 - 2s - loss: 1.6473 - accuracy: 0.3314
Epoch 11/100
3632/3632 - 2s - loss: 1.6468 - accuracy: 0.3314
Epoch 12/100
3632/3632 - 2s - loss: 1.6466 - accuracy: 0.3313
Epoch 13/100
3632/3632 - 2s - loss: 1.6463 - accuracy: 0.3315
Epoch 14/100
3632/3632 - 2s - loss: 1.6458 - accuracy: 0.3323
Epoch 15/100
3632/3632 - 2s - loss: 1.6458 - accuracy: 0.3324
Epoch 16/100
3632/3632 - 2s - loss: 1.6455 - accuracy: 0.3320
Epoch 17/100
3632

<tensorflow.python.keras.callbacks.History at 0x1ba697b3248>

## Evaluating the model using the test data

In [28]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1211/1211 - 0s - loss: 1.6466 - accuracy: 0.3311
Loss: 1.6466114521026611, Accuracy: 0.33113881945610046


# Deep Learning

In [21]:
# In this model we use an additional hidden layer of 39 nodes
deep_model = Sequential()
deep_model.add(Dense(units=39, activation='relu', input_dim=13))
deep_model.add(Dense(units=39, activation='relu'))
deep_model.add(Dense(units=10, activation='softmax'))

## Summary of Deep model

In [23]:
deep_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 39)                546       
_________________________________________________________________
dense_3 (Dense)              (None, 39)                1560      
_________________________________________________________________
dense_4 (Dense)              (None, 10)                400       
Total params: 2,506
Trainable params: 2,506
Non-trainable params: 0
_________________________________________________________________


## Compile the deep model

In [24]:
# Compiling the deep model
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

## Training the deep model

In [25]:
# Fit (train) the deep model
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
3632/3632 - 2s - loss: 1.6774 - accuracy: 0.3252
Epoch 2/100
3632/3632 - 2s - loss: 1.6537 - accuracy: 0.3306
Epoch 3/100
3632/3632 - 2s - loss: 1.6497 - accuracy: 0.3319
Epoch 4/100
3632/3632 - 2s - loss: 1.6467 - accuracy: 0.3321
Epoch 5/100
3632/3632 - 2s - loss: 1.6450 - accuracy: 0.3325
Epoch 6/100
3632/3632 - 2s - loss: 1.6430 - accuracy: 0.3339
Epoch 7/100
3632/3632 - 2s - loss: 1.6414 - accuracy: 0.3343
Epoch 8/100
3632/3632 - 2s - loss: 1.6400 - accuracy: 0.3336
Epoch 9/100
3632/3632 - 2s - loss: 1.6387 - accuracy: 0.3352
Epoch 10/100
3632/3632 - 2s - loss: 1.6377 - accuracy: 0.3353
Epoch 11/100
3632/3632 - 2s - loss: 1.6370 - accuracy: 0.3358
Epoch 12/100
3632/3632 - 2s - loss: 1.6362 - accuracy: 0.3363
Epoch 13/100
3632/3632 - 2s - loss: 1.6351 - accuracy: 0.3360
Epoch 14/100
3632/3632 - 2s - loss: 1.6346 - accuracy: 0.3357
Epoch 15/100
3632/3632 - 2s - loss: 1.6339 - accuracy: 0.3370
Epoch 16/100
3632/3632 - 2s - loss: 1.6329 - accuracy: 0.3364
Epoch 17/100
3632

<tensorflow.python.keras.callbacks.History at 0x1ba6abe9a48>

In [26]:
# Evaluate the deep model using the testing data
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1211/1211 - 0s - loss: 1.6531 - accuracy: 0.3297
Deep Neural Network - Loss: 1.6531181335449219, Accuracy: 0.3297446668148041


## Saving the deep model

In [27]:
# Save the model
deep_model.save("artist_billboard_trained.h5")