![Billboard banner](https://www.clipartkey.com/mpngs/m/62-628657_billboard-logo-png-billboard-top-100.png)

# Neural Network Machine Learning Model Version 3

In [1]:
# Import the neccessary libraries
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
print("All neccessary libraries imported")

All neccessary libraries imported


In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(42)

In [3]:
# Raw data from Resources directory (change the link as needed)
attribute_data = "Resources/BillboardFromLast20/songAttributes_1999-2019.csv"

In [4]:
# Read the wildfire data into a dataframe
df_attributes = pd.read_csv(attribute_data)
# Drop the columns with string values
df_attributes.drop(columns=['Unnamed: 0','Album','Artist','Name'],inplace=True)
# Convert the boolean column to integer
df_attributes["Explicit"] = df_attributes["Explicit"].astype(int)
# Let's see the shape and the first 5 rows of the dataframe
print('\033[1m'+"The shape of the {} dataframe is {}:" .format("attribute",df_attributes.shape)+'\033[0m')
df_attributes.head()

[1mThe shape of the attribute dataframe is (154931, 14):[0m


Unnamed: 0,Acousticness,Danceability,Duration,Energy,Explicit,Instrumentalness,Liveness,Loudness,Mode,Popularity,Speechiness,Tempo,TimeSignature,Valence
0,0.000728,0.52,234947,0.904,0,0.0103,0.0634,-5.03,1,35,0.0309,106.022,4,0.365
1,0.0182,0.581,239573,0.709,0,0.000664,0.174,-4.909,1,31,0.0282,120.027,4,0.408
2,0.000473,0.572,198400,0.918,0,0.000431,0.0977,-3.324,0,30,0.0559,144.061,4,0.37
3,0.00097,0.596,231453,0.661,0,3.3e-05,0.113,-5.051,1,35,0.0254,111.975,4,0.183
4,3.6e-05,0.52,222520,0.808,0,1e-05,0.08,-4.553,0,21,0.0318,92.721,4,0.666


In [5]:
# Let's see the column names in the dataframe
print(df_attributes.columns.tolist())

['Acousticness', 'Danceability', 'Duration', 'Energy', 'Explicit', 'Instrumentalness', 'Liveness', 'Loudness', 'Mode', 'Popularity', 'Speechiness', 'Tempo', 'TimeSignature', 'Valence']


In [6]:
# Basic information of the df_fire dataframe by the info() method
df_attributes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 154931 entries, 0 to 154930
Data columns (total 14 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Acousticness      154931 non-null  float64
 1   Danceability      154931 non-null  float64
 2   Duration          154931 non-null  int64  
 3   Energy            154931 non-null  float64
 4   Explicit          154931 non-null  int32  
 5   Instrumentalness  154931 non-null  float64
 6   Liveness          154931 non-null  float64
 7   Loudness          154931 non-null  float64
 8   Mode              154931 non-null  int64  
 9   Popularity        154931 non-null  int64  
 10  Speechiness       154931 non-null  float64
 11  Tempo             154931 non-null  float64
 12  TimeSignature     154931 non-null  int64  
 13  Valence           154931 non-null  float64
dtypes: float64(9), int32(1), int64(4)
memory usage: 16.0 MB


## Create a new column for popularity rank

In [7]:
# Create a new column that bins the popularity from 0-9
df_attributes["Rank"] = df_attributes["Popularity"].apply(lambda x: 1 if x <=15 else 0)
# Let's see the new column
df_attributes.head()

Unnamed: 0,Acousticness,Danceability,Duration,Energy,Explicit,Instrumentalness,Liveness,Loudness,Mode,Popularity,Speechiness,Tempo,TimeSignature,Valence,Rank
0,0.000728,0.52,234947,0.904,0,0.0103,0.0634,-5.03,1,35,0.0309,106.022,4,0.365,0
1,0.0182,0.581,239573,0.709,0,0.000664,0.174,-4.909,1,31,0.0282,120.027,4,0.408,0
2,0.000473,0.572,198400,0.918,0,0.000431,0.0977,-3.324,0,30,0.0559,144.061,4,0.37,0
3,0.00097,0.596,231453,0.661,0,3.3e-05,0.113,-5.051,1,35,0.0254,111.975,4,0.183,0
4,3.6e-05,0.52,222520,0.808,0,1e-05,0.08,-4.553,0,21,0.0318,92.721,4,0.666,0


## Creating input and output datasets

In [8]:
import numpy as np
# Create a copy of df_attributes
df_copy = df_attributes.copy()
# Create X and Y numpy arrays
y = df_copy[['Rank']].to_numpy() # Keep only the rank column
df_copy.drop(columns=['Popularity','Rank'],inplace=True) # Keep only the attributes
X = df_copy.to_numpy() # Convert the dataframe to numpy array
print('\033[1m'+"Shape of the X dataset: {}".format(X.shape)+'\033[0m')
print('\033[1m'+"Shape of the y dataset: {}".format(y.shape)+'\033[0m')

[1mShape of the X dataset: (154931, 13)[0m
[1mShape of the y dataset: (154931, 1)[0m


## Creating train and test splits

In [9]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Shape of the train & test set
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

Train set: (123944, 13) (123944, 1)
Test set: (30987, 13) (30987, 1)


## Data Preprocessing

In [10]:
# Importing StandardScaler from sklearn library
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [11]:
# Scaling both test and train data for X dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Install tensorflow module (if not available) by uncommenting below
#!pip install keras
#!pip install --upgrade tensorflow

## One hot encoding of labels

In [13]:
from tensorflow.keras.utils import to_categorical
# Converting the labels (y_train and y_test) to categorical values
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

## Defining the model architecture

In [14]:
from tensorflow.keras.models import Sequential
# Creating a sequential model
model = Sequential()

## Defining the input of the model

In [15]:
from tensorflow.keras.layers import Dense
number_inputs = 13
number_hidden_nodes = 39
model.add(Dense(units=number_hidden_nodes,
                activation='tanh', input_dim=number_inputs))

## Defining the output of the model

In [16]:
# Defining the output layer
number_classes = 2 # Labels we are trying to predict (either 'Hit' or 'Miss')
model.add(Dense(units=number_classes, activation='softmax'))

## Summary of the model

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 39)                546       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 80        
Total params: 626
Trainable params: 626
Non-trainable params: 0
_________________________________________________________________


## Compile the model

In [18]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## Training the model

In [19]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100, # Hundred iterations or loops
    shuffle=True,
    verbose=2
)

Epoch 1/100
3874/3874 - 4s - loss: 0.6690 - accuracy: 0.5939
Epoch 2/100
3874/3874 - 5s - loss: 0.6612 - accuracy: 0.6054
Epoch 3/100
3874/3874 - 4s - loss: 0.6587 - accuracy: 0.6095
Epoch 4/100
3874/3874 - 4s - loss: 0.6571 - accuracy: 0.6110
Epoch 5/100
3874/3874 - 5s - loss: 0.6565 - accuracy: 0.6132
Epoch 6/100
3874/3874 - 5s - loss: 0.6558 - accuracy: 0.6126
Epoch 7/100
3874/3874 - 5s - loss: 0.6551 - accuracy: 0.6148
Epoch 8/100
3874/3874 - 5s - loss: 0.6547 - accuracy: 0.6146
Epoch 9/100
3874/3874 - 5s - loss: 0.6543 - accuracy: 0.6156
Epoch 10/100
3874/3874 - 4s - loss: 0.6538 - accuracy: 0.6160
Epoch 11/100
3874/3874 - 5s - loss: 0.6534 - accuracy: 0.6158
Epoch 12/100
3874/3874 - 5s - loss: 0.6531 - accuracy: 0.6174
Epoch 13/100
3874/3874 - 5s - loss: 0.6530 - accuracy: 0.6175
Epoch 14/100
3874/3874 - 5s - loss: 0.6527 - accuracy: 0.6175
Epoch 15/100
3874/3874 - 5s - loss: 0.6524 - accuracy: 0.6174
Epoch 16/100
3874/3874 - 5s - loss: 0.6522 - accuracy: 0.6183
Epoch 17/100
3874

<tensorflow.python.keras.callbacks.History at 0x196f4ef8048>

## Evaluating the model using the test data

In [20]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

969/969 - 1s - loss: 0.6559 - accuracy: 0.6170
Loss: 0.6558882594108582, Accuracy: 0.6169683933258057


# Deep Learning

In [21]:
# In this model we use an additional hidden layer of 39 nodes
deep_model = Sequential()
deep_model.add(Dense(units=39, activation='tanh', input_dim=13))
deep_model.add(Dense(units=39, activation='tanh'))
deep_model.add(Dense(units=2, activation='softmax'))

## Summary of Deep model

In [22]:
deep_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 39)                546       
_________________________________________________________________
dense_3 (Dense)              (None, 39)                1560      
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 80        
Total params: 2,186
Trainable params: 2,186
Non-trainable params: 0
_________________________________________________________________


## Compile the deep model

In [23]:
# Compiling the deep model
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

## Training the deep model

In [24]:
# Fit (train) the deep model
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
3874/3874 - 4s - loss: 0.6658 - accuracy: 0.5983
Epoch 2/100
3874/3874 - 4s - loss: 0.6590 - accuracy: 0.6095
Epoch 3/100
3874/3874 - 5s - loss: 0.6565 - accuracy: 0.6127
Epoch 4/100
3874/3874 - 4s - loss: 0.6553 - accuracy: 0.6156
Epoch 5/100
3874/3874 - 4s - loss: 0.6539 - accuracy: 0.6167
Epoch 6/100
3874/3874 - 4s - loss: 0.6531 - accuracy: 0.6172
Epoch 7/100
3874/3874 - 5s - loss: 0.6525 - accuracy: 0.6192
Epoch 8/100
3874/3874 - 4s - loss: 0.6521 - accuracy: 0.6185
Epoch 9/100
3874/3874 - 4s - loss: 0.6515 - accuracy: 0.6198
Epoch 10/100
3874/3874 - 5s - loss: 0.6510 - accuracy: 0.6202
Epoch 11/100
3874/3874 - 4s - loss: 0.6504 - accuracy: 0.6205
Epoch 12/100
3874/3874 - 5s - loss: 0.6499 - accuracy: 0.6214
Epoch 13/100
3874/3874 - 5s - loss: 0.6494 - accuracy: 0.6212
Epoch 14/100
3874/3874 - 5s - loss: 0.6493 - accuracy: 0.6226
Epoch 15/100
3874/3874 - 4s - loss: 0.6488 - accuracy: 0.6234
Epoch 16/100
3874/3874 - 5s - loss: 0.6483 - accuracy: 0.6226
Epoch 17/100
3874

<tensorflow.python.keras.callbacks.History at 0x196f68e4048>

In [25]:
# Evaluate the deep model using the testing data
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

969/969 - 1s - loss: 0.6578 - accuracy: 0.6129
Deep Neural Network - Loss: 0.6578496098518372, Accuracy: 0.6129021644592285


## Saving the deep model

In [26]:
# Save the model
deep_model.save("artist_billboard_trained.h5")