![Billboard banner](https://www.clipartkey.com/mpngs/m/62-628657_billboard-logo-png-billboard-top-100.png)

# Neural Network Machine Learning Model

In [62]:
# Import the neccessary libraries
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
print("All neccessary libraries imported")

All neccessary libraries imported


In [63]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(42)

In [64]:
# Raw data from Resources directory (change the link as needed)
attribute_data = "Resources/BillboardFromLast20/songAttributes_1999-2019.csv"

In [65]:
# Read the wildfire data into a dataframe
df_attributes = pd.read_csv(attribute_data)
# Drop the columns with string values
df_attributes.drop(columns=['Unnamed: 0','Album','Artist','Name'],inplace=True)
# Convert the boolean column to integer
df_attributes["Explicit"] = df_attributes["Explicit"].astype(int)
# Let's see the shape and the first 5 rows of the dataframe
print('\033[1m'+"The shape of the {} dataframe is {}:" .format("attribute",df_attributes.shape)+'\033[0m')
df_attributes.head()

[1mThe shape of the attribute dataframe is (154931, 14):[0m


Unnamed: 0,Acousticness,Danceability,Duration,Energy,Explicit,Instrumentalness,Liveness,Loudness,Mode,Popularity,Speechiness,Tempo,TimeSignature,Valence
0,0.000728,0.52,234947,0.904,0,0.0103,0.0634,-5.03,1,35,0.0309,106.022,4,0.365
1,0.0182,0.581,239573,0.709,0,0.000664,0.174,-4.909,1,31,0.0282,120.027,4,0.408
2,0.000473,0.572,198400,0.918,0,0.000431,0.0977,-3.324,0,30,0.0559,144.061,4,0.37
3,0.00097,0.596,231453,0.661,0,3.3e-05,0.113,-5.051,1,35,0.0254,111.975,4,0.183
4,3.6e-05,0.52,222520,0.808,0,1e-05,0.08,-4.553,0,21,0.0318,92.721,4,0.666


In [66]:
# Let's see the column names in the dataframe
print(df_attributes.columns.tolist())

['Acousticness', 'Danceability', 'Duration', 'Energy', 'Explicit', 'Instrumentalness', 'Liveness', 'Loudness', 'Mode', 'Popularity', 'Speechiness', 'Tempo', 'TimeSignature', 'Valence']


In [67]:
# Basic information of the df_fire dataframe by the info() method
df_attributes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 154931 entries, 0 to 154930
Data columns (total 14 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Acousticness      154931 non-null  float64
 1   Danceability      154931 non-null  float64
 2   Duration          154931 non-null  int64  
 3   Energy            154931 non-null  float64
 4   Explicit          154931 non-null  int32  
 5   Instrumentalness  154931 non-null  float64
 6   Liveness          154931 non-null  float64
 7   Loudness          154931 non-null  float64
 8   Mode              154931 non-null  int64  
 9   Popularity        154931 non-null  int64  
 10  Speechiness       154931 non-null  float64
 11  Tempo             154931 non-null  float64
 12  TimeSignature     154931 non-null  int64  
 13  Valence           154931 non-null  float64
dtypes: float64(9), int32(1), int64(4)
memory usage: 16.0 MB


In [68]:
# Let's see the maximum and minimum popularity
print('\033[1m'+"Maximum Popularity is : {}" .format(df_attributes["Popularity"].max())+'\033[0m')
print('\033[1m'+"Minimum Popularity is : {}" .format(df_attributes["Popularity"].min())+'\033[0m')

[1mMaximum Popularity is : 91[0m
[1mMinimum Popularity is : 0[0m


## Creating input and output datasets

In [69]:
import numpy as np
# Create a copy of df_attributes
df_copy = df_attributes.copy()
# Create X and Y numpy arrays
y = df_copy[['Popularity']].to_numpy() # Keep only the rank column
df_copy.drop(columns=['Popularity'],inplace=True) # Keep only the attributes
X = df_copy.to_numpy() # Convert the dataframe to numpy array
print(X.shape)
print(y.shape)

(154931, 13)
(154931, 1)


## Creating train and test splits

In [70]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

## Data Preprocessing

In [71]:
# Importing StandardScaler from sklearn library
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [72]:
# Scaling both test and train data for X dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [73]:
# Install tensorflow module (if not available) by uncommenting below
#!pip install keras
#!pip install --upgrade tensorflow

## One hot encoding of labels

In [74]:
from tensorflow.keras.utils import to_categorical
# Converting the labels (y_train and y_test) to categorical values
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

## Defining the model architecture

In [75]:
from tensorflow.keras.models import Sequential
# Creating a sequential model
model = Sequential()

## Defining the input of the model

In [76]:
from tensorflow.keras.layers import Dense
number_inputs = 13
number_hidden_nodes = 39
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

## Defining the output of the model

In [77]:
# Defining the output layer
number_classes = 92 # Labels we are trying to predict (100 in this case)
model.add(Dense(units=number_classes, activation='softmax'))

## Summary of the model

In [78]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 39)                546       
_________________________________________________________________
dense_10 (Dense)             (None, 92)                3680      
Total params: 4,226
Trainable params: 4,226
Non-trainable params: 0
_________________________________________________________________


## Compile the model

In [79]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## Training the model

In [80]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100, # Thousand iterations or loops
    shuffle=True,
    verbose=2
)

Epoch 1/100
3632/3632 - 2s - loss: 3.9579 - accuracy: 0.0786
Epoch 2/100
3632/3632 - 2s - loss: 3.9159 - accuracy: 0.0807
Epoch 3/100
3632/3632 - 2s - loss: 3.9100 - accuracy: 0.0807
Epoch 4/100
3632/3632 - 2s - loss: 3.9059 - accuracy: 0.0807
Epoch 5/100
3632/3632 - 2s - loss: 3.9029 - accuracy: 0.0809
Epoch 6/100
3632/3632 - 2s - loss: 3.9006 - accuracy: 0.0808
Epoch 7/100
3632/3632 - 2s - loss: 3.8986 - accuracy: 0.0810
Epoch 8/100
3632/3632 - 2s - loss: 3.8970 - accuracy: 0.0807
Epoch 9/100
3632/3632 - 2s - loss: 3.8955 - accuracy: 0.0809
Epoch 10/100
3632/3632 - 2s - loss: 3.8946 - accuracy: 0.0806
Epoch 11/100
3632/3632 - 2s - loss: 3.8936 - accuracy: 0.0813
Epoch 12/100
3632/3632 - 2s - loss: 3.8925 - accuracy: 0.0812
Epoch 13/100
3632/3632 - 2s - loss: 3.8916 - accuracy: 0.0811
Epoch 14/100
3632/3632 - 2s - loss: 3.8907 - accuracy: 0.0812
Epoch 15/100
3632/3632 - 2s - loss: 3.8899 - accuracy: 0.0812
Epoch 16/100
3632/3632 - 2s - loss: 3.8891 - accuracy: 0.0811
Epoch 17/100
3632

<tensorflow.python.keras.callbacks.History at 0x1658dcbd0c8>

## Evaluating the model using the test data

In [81]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1211/1211 - 0s - loss: 3.9102 - accuracy: 0.0837
Loss: 3.91015887260437, Accuracy: 0.0837012380361557


# Deep Learning

In [82]:
# In this model we use an additional hidden layer of 39 nodes
deep_model = Sequential()
deep_model.add(Dense(units=39, activation='relu', input_dim=13))
deep_model.add(Dense(units=39, activation='relu'))
deep_model.add(Dense(units=92, activation='softmax'))

## Summary of Deep model

In [83]:
deep_model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 39)                546       
_________________________________________________________________
dense_12 (Dense)             (None, 39)                1560      
_________________________________________________________________
dense_13 (Dense)             (None, 92)                3680      
Total params: 5,786
Trainable params: 5,786
Non-trainable params: 0
_________________________________________________________________


## Compile the deep model

In [84]:
# Compiling the deep model
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

## Training the deep model

In [85]:
# Fit (train) the deep model
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
3632/3632 - 2s - loss: 3.9461 - accuracy: 0.0795
Epoch 2/100
3632/3632 - 2s - loss: 3.9122 - accuracy: 0.0803
Epoch 3/100
3632/3632 - 2s - loss: 3.9042 - accuracy: 0.0807
Epoch 4/100
3632/3632 - 2s - loss: 3.8987 - accuracy: 0.0807
Epoch 5/100
3632/3632 - 2s - loss: 3.8945 - accuracy: 0.0809
Epoch 6/100
3632/3632 - 2s - loss: 3.8904 - accuracy: 0.0810
Epoch 7/100
3632/3632 - 2s - loss: 3.8873 - accuracy: 0.0814
Epoch 8/100
3632/3632 - 2s - loss: 3.8853 - accuracy: 0.0812
Epoch 9/100
3632/3632 - 2s - loss: 3.8831 - accuracy: 0.0815
Epoch 10/100
3632/3632 - 2s - loss: 3.8812 - accuracy: 0.0816
Epoch 11/100
3632/3632 - 2s - loss: 3.8796 - accuracy: 0.0816
Epoch 12/100
3632/3632 - 2s - loss: 3.8775 - accuracy: 0.0819
Epoch 13/100
3632/3632 - 2s - loss: 3.8765 - accuracy: 0.0817
Epoch 14/100
3632/3632 - 2s - loss: 3.8750 - accuracy: 0.0818
Epoch 15/100
3632/3632 - 2s - loss: 3.8738 - accuracy: 0.0816
Epoch 16/100
3632/3632 - 2s - loss: 3.8722 - accuracy: 0.0817
Epoch 17/100
3632

<tensorflow.python.keras.callbacks.History at 0x1658dbc88c8>

In [86]:
# Evaluate the deep model using the testing data
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1211/1211 - 0s - loss: 3.9211 - accuracy: 0.0827
Deep Neural Network - Loss: 3.921096086502075, Accuracy: 0.08269434422254562
