In [1]:
# DataFrame
import pandas as pd

# Matplot
import matplotlib.pyplot as plt
%matplotlib inline

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.metrics import mean_absolute_error,mean_squared_error


# Keras
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Embedding, Flatten, Conv1D, MaxPooling1D, LSTM
from keras.callbacks import EarlyStopping
from keras import utils

# Utility
import numpy as np
import time

In [2]:
#Importing Dataset
data = pd.read_csv('Datasets/cleaned_data.csv')
data = data.drop(['Unnamed: 0'], axis = 1)

In [3]:
# making a new dataset with only numerical data
numer_data = data.select_dtypes(include=['float64','int64'])
numer_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1605 entries, 0 to 1604
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_tracks      1605 non-null   float64
 1   duration_ms       1605 non-null   float64
 2   popularity        1605 non-null   float64
 3   danceability      1605 non-null   float64
 4   energy            1605 non-null   float64
 5   key               1605 non-null   float64
 6   loudness          1605 non-null   float64
 7   mode              1605 non-null   float64
 8   speechiness       1605 non-null   float64
 9   acousticness      1605 non-null   float64
 10  instrumentalness  1605 non-null   float64
 11  liveness          1605 non-null   float64
 12  valence           1605 non-null   float64
 13  tempo             1605 non-null   float64
 14  time_signature    1605 non-null   float64
dtypes: float64(15)
memory usage: 188.2 KB


## Machine Learning model - Neural Network

In [4]:
# Split into train and test dataset.
data_train, data_test = train_test_split(numer_data, test_size=0.2, random_state=42)
print("TRAIN size:", len(data_train))
print("TEST size:", len(data_test))

TRAIN size: 1284
TEST size: 321


In [5]:
x_train = data_train.drop(['popularity', 'time_signature'], axis = 1).values
x_train

array([[0.15730337, 0.09964049, 0.51372998, ..., 0.07922191, 0.73766722,
        0.19585245],
       [0.15730337, 0.11664177, 0.86498856, ..., 0.32297327, 0.84009197,
        0.21596757],
       [0.        , 0.0158407 , 0.5778032 , ..., 0.09910889, 0.61747492,
        0.74232817],
       ...,
       [0.06741573, 0.01706077, 0.55949657, ..., 0.07063682, 0.18373746,
        0.52616999],
       [0.12359551, 0.13909014, 0.20251716, ..., 0.15018474, 0.20150502,
        0.2587964 ],
       [0.        , 0.10395394, 0.67048055, ..., 0.02814606, 0.61433946,
        0.47431287]])

In [6]:
x_test = data_test.drop(['popularity', 'time_signature'], axis = 1).values
x_test

array([[0.12359551, 0.13789252, 0.3604119 , ..., 0.13605738, 0.4868311 ,
        0.50039392],
       [0.31460674, 0.11990352, 0.21853547, ..., 0.24581613, 0.31229097,
        0.27583008],
       [0.12359551, 0.08965882, 0.63386728, ..., 0.00652032, 0.94042642,
        0.24561927],
       ...,
       [0.19101124, 0.1286292 , 0.43592677, ..., 0.11649641, 0.51400502,
        0.19023597],
       [0.08988764, 0.2032622 , 0.13501144, ..., 0.10128233, 0.00668896,
        0.49878013],
       [0.        , 0.04507634, 0.60755149, ..., 0.33166703, 0.5882107 ,
        0.67628372]])

In [7]:
y_train = data_train['popularity'].values
y_train

array([0.7311828 , 0.68817204, 0.70967742, ..., 0.38709677, 0.7311828 ,
       0.29032258])

In [8]:
y_test = data_test['popularity'].values
y_test

array([0.79569892, 0.6344086 , 0.80645161, 0.        , 0.6344086 ,
       0.83870968, 0.58064516, 0.52688172, 0.56989247, 0.8172043 ,
       0.55913978, 0.07526882, 0.35483871, 0.37634409, 0.41935484,
       0.67741935, 0.90322581, 0.46236559, 0.8172043 , 0.78494624,
       0.75268817, 0.92473118, 0.47311828, 0.50537634, 0.75268817,
       0.62365591, 0.91397849, 0.5483871 , 0.49462366, 0.64516129,
       0.89247312, 0.66666667, 0.67741935, 0.70967742, 0.68817204,
       0.68817204, 0.03225806, 0.50537634, 0.84946237, 0.68817204,
       0.49462366, 0.83870968, 0.74193548, 0.65591398, 0.62365591,
       0.88172043, 0.61290323, 0.72043011, 0.67741935, 0.53763441,
       0.51612903, 0.78494624, 0.53763441, 0.61290323, 0.34408602,
       0.70967742, 0.56989247, 0.75268817, 0.7311828 , 0.27956989,
       0.13978495, 0.74193548, 0.07526882, 0.25806452, 0.77419355,
       0.49462366, 0.5483871 , 0.82795699, 0.70967742, 0.75268817,
       0.50537634, 0.90322581, 0.34408602, 0.79569892, 0.66666

In [9]:
# Building Model
callback = EarlyStopping(monitor='val_loss', patience=8)
model = Sequential()
  
model.add(Dense(15, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='relu'))
model.compile(optimizer='adam', loss='mse')

2022-04-22 18:47:10.150043: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
model.fit(x=x_train, y=y_train, epochs=80, 
          validation_data=(x_test,y_test),
         callbacks = callback)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
 1/41 [..............................] - ETA: 0s - loss: 0.1410

In [None]:
model_losses = pd.DataFrame(model.history.history)
model_losses.plot()

In [None]:
predictions = model.predict(x_test)
sample = pd.DataFrame(predictions,columns=['Predict'])
sample['Actual']=y_test
sample.head(10)

In [None]:
mean_absolute_error(y_test,predictions)

In [None]:
np.sqrt(mean_squared_error(y_test,predictions))