In [1]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [2]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\msflo\anaconda3\envs\pythondata\lib\site-packages (0.0)


In [3]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(1)

In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import joblib
import numpy as np
import pandas as pd
import sklearn
import sklearn.datasets

In [5]:
df = pd.read_csv('data/audio_features_hot_100_1958_2019.csv')

# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()
# Display dataframe
df.head()

Unnamed: 0,track_id,artist,track,spotify_genre,spotify_track_id,spotify_track_album,spotify_track_explicit,spotify_track_duration_ms,spotify_track_popularity,danceability,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,"AdictoTainy, Anuel AA & Ozuna","Tainy, Anuel AA & Ozuna",Adicto,['pop reggaeton'],3jbT1Y5MoPwEIpZndDDwVq,Adicto (with Anuel AA & Ozuna),0.0,270740.0,91.0,0.734,...,10.0,-4.803,0.0,0.0735,0.017,1.6e-05,0.179,0.623,80.002,4.0
2,ShallowLady Gaga & Bradley Cooper,Lady Gaga & Bradley Cooper,Shallow,"['dance pop', 'pop']",2VxeLyX666F8uXCJ0dZF8B,A Star Is Born Soundtrack,0.0,215733.0,88.0,0.572,...,7.0,-6.362,1.0,0.0308,0.371,0.0,0.231,0.323,95.799,4.0
3,EnemiesPost Malone Featuring DaBaby,Post Malone Featuring DaBaby,Enemies,"['dfw rap', 'melodic rap', 'rap']",0Xek5rqai2jcOWCYWJfVCF,Hollywood's Bleeding,1.0,196760.0,86.0,0.542,...,6.0,-4.169,1.0,0.21,0.0588,0.0,0.0955,0.667,76.388,4.0
4,"Bacc At It AgainYella Beezy, Gucci Mane & Quavo","Yella Beezy, Gucci Mane & Quavo",Bacc At It Again,"['dfw rap', 'rap', 'southern hip hop', 'trap']",2biNa12dMbHJrHVFRt8JyO,Bacc At It Again,1.0,228185.0,61.0,0.948,...,8.0,-5.725,0.0,0.168,0.00124,1e-06,0.0716,0.856,135.979,4.0
5,The ArcherTaylor Swift,Taylor Swift,The Archer,"['dance pop', 'pop', 'post-teen pop']",3pHkh7d0lzM2AldUtz2x37,Lover,0.0,211240.0,76.0,0.292,...,0.0,-9.375,1.0,0.0401,0.12,0.00569,0.0663,0.166,124.344,4.0


In [6]:
df['target'] = pd.qcut(df['spotify_track_popularity'],10, labels = False)

In [7]:
from sklearn.model_selection import train_test_split
y = pd.Series(df["target"])
X = df.drop(columns=["track_id", "artist", "track", "spotify_genre", "spotify_track_id", "spotify_track_album", "target", 'spotify_track_popularity'])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [8]:
y.describe()

count    23559.000000
mean         4.435460
std          2.859693
min          0.000000
25%          2.000000
50%          4.000000
75%          7.000000
max          9.000000
Name: target, dtype: float64

In [9]:
X_train

Unnamed: 0,spotify_track_explicit,spotify_track_duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
28040,0.0,203440.0,0.665,0.664,4.0,-9.480,1.0,0.0358,0.1860,0.000002,0.1020,0.960,88.679,4.0
17380,0.0,147653.0,0.434,0.780,0.0,-7.826,1.0,0.1630,0.6500,0.000000,0.0682,0.644,171.940,4.0
7310,0.0,251293.0,0.431,0.735,1.0,-6.995,1.0,0.0508,0.0945,0.000111,0.1170,0.360,139.984,4.0
5864,1.0,284600.0,0.736,0.549,8.0,-6.580,1.0,0.1290,0.0541,0.000000,0.1090,0.272,142.064,4.0
20132,0.0,227933.0,0.715,0.400,9.0,-13.250,1.0,0.0876,0.0760,0.000000,0.0516,0.706,96.607,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12776,0.0,204200.0,0.623,0.590,4.0,-5.014,1.0,0.0280,0.6460,0.000000,0.2950,0.579,137.071,4.0
20620,0.0,169533.0,0.628,0.693,2.0,-11.236,1.0,0.0301,0.0562,0.005020,0.1860,0.822,125.349,4.0
6085,0.0,263960.0,0.650,0.665,6.0,-12.763,0.0,0.0469,0.2080,0.000000,0.3040,0.725,135.598,4.0
14192,0.0,225560.0,0.802,0.738,0.0,-5.975,0.0,0.2360,0.0192,0.000000,0.2100,0.597,110.862,4.0


In [10]:
y_train

28040    0
17380    0
7310     6
5864     6
20132    1
        ..
12776    8
20620    0
6085     6
14192    5
252      9
Name: target, Length: 17669, dtype: int64

In [11]:
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense

In [13]:
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [14]:
y_train_categorical

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [15]:
X_train_scaled.shape

(17669, 14)

In [16]:
model = Sequential()
model.add(Dense(units=10, activation='relu', input_dim=14))
model.add(Dense(units=10, activation='softmax'))

In [17]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                150       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
Total params: 260
Trainable params: 260
Non-trainable params: 0
_________________________________________________________________


In [22]:
# set
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=600,
    shuffle=True,
    verbose=2
)

Epoch 1/600
553/553 - 1s - loss: 2.0969 - accuracy: 0.2039
Epoch 2/600
553/553 - 1s - loss: 2.0965 - accuracy: 0.2047
Epoch 3/600
553/553 - 1s - loss: 2.0957 - accuracy: 0.2037
Epoch 4/600
553/553 - 1s - loss: 2.0956 - accuracy: 0.2029
Epoch 5/600
553/553 - 1s - loss: 2.0955 - accuracy: 0.2071
Epoch 6/600
553/553 - 2s - loss: 2.0951 - accuracy: 0.2073
Epoch 7/600
553/553 - 2s - loss: 2.0946 - accuracy: 0.2061
Epoch 8/600
553/553 - 2s - loss: 2.0944 - accuracy: 0.2057
Epoch 9/600
553/553 - 2s - loss: 2.0946 - accuracy: 0.2040
Epoch 10/600
553/553 - 1s - loss: 2.0939 - accuracy: 0.2083
Epoch 11/600
553/553 - 1s - loss: 2.0935 - accuracy: 0.2059
Epoch 12/600
553/553 - 1s - loss: 2.0935 - accuracy: 0.2066
Epoch 13/600
553/553 - 1s - loss: 2.0932 - accuracy: 0.2061
Epoch 14/600
553/553 - 1s - loss: 2.0927 - accuracy: 0.2067
Epoch 15/600
553/553 - 1s - loss: 2.0921 - accuracy: 0.2065
Epoch 16/600
553/553 - 1s - loss: 2.0921 - accuracy: 0.2065
Epoch 17/600
553/553 - 1s - loss: 2.0920 - accura

553/553 - 1s - loss: 2.0766 - accuracy: 0.2117
Epoch 138/600
553/553 - 1s - loss: 2.0767 - accuracy: 0.2124
Epoch 139/600
553/553 - 1s - loss: 2.0768 - accuracy: 0.2117
Epoch 140/600
553/553 - 1s - loss: 2.0764 - accuracy: 0.2125
Epoch 141/600
553/553 - 2s - loss: 2.0762 - accuracy: 0.2138
Epoch 142/600
553/553 - 2s - loss: 2.0761 - accuracy: 0.2120
Epoch 143/600
553/553 - 2s - loss: 2.0767 - accuracy: 0.2125
Epoch 144/600
553/553 - 2s - loss: 2.0765 - accuracy: 0.2140
Epoch 145/600
553/553 - 2s - loss: 2.0762 - accuracy: 0.2120
Epoch 146/600
553/553 - 2s - loss: 2.0763 - accuracy: 0.2118
Epoch 147/600
553/553 - 2s - loss: 2.0761 - accuracy: 0.2102
Epoch 148/600
553/553 - 3s - loss: 2.0764 - accuracy: 0.2113
Epoch 149/600
553/553 - 2s - loss: 2.0763 - accuracy: 0.2134
Epoch 150/600
553/553 - 2s - loss: 2.0754 - accuracy: 0.2105
Epoch 151/600
553/553 - 1s - loss: 2.0758 - accuracy: 0.2109
Epoch 152/600
553/553 - 2s - loss: 2.0761 - accuracy: 0.2125
Epoch 153/600
553/553 - 1s - loss: 2.0

Epoch 272/600
553/553 - 2s - loss: 2.0725 - accuracy: 0.2124
Epoch 273/600
553/553 - 2s - loss: 2.0738 - accuracy: 0.2103
Epoch 274/600
553/553 - 2s - loss: 2.0730 - accuracy: 0.2117
Epoch 275/600
553/553 - 2s - loss: 2.0732 - accuracy: 0.2108
Epoch 276/600
553/553 - 2s - loss: 2.0735 - accuracy: 0.2135
Epoch 277/600
553/553 - 2s - loss: 2.0727 - accuracy: 0.2117
Epoch 278/600
553/553 - 2s - loss: 2.0726 - accuracy: 0.2126
Epoch 279/600
553/553 - 2s - loss: 2.0729 - accuracy: 0.2108
Epoch 280/600
553/553 - 2s - loss: 2.0735 - accuracy: 0.2119
Epoch 281/600
553/553 - 2s - loss: 2.0727 - accuracy: 0.2113
Epoch 282/600
553/553 - 2s - loss: 2.0724 - accuracy: 0.2138
Epoch 283/600
553/553 - 2s - loss: 2.0726 - accuracy: 0.2117
Epoch 284/600
553/553 - 2s - loss: 2.0723 - accuracy: 0.2135
Epoch 285/600
553/553 - 2s - loss: 2.0725 - accuracy: 0.2126
Epoch 286/600
553/553 - 2s - loss: 2.0729 - accuracy: 0.2114
Epoch 287/600
553/553 - 2s - loss: 2.0733 - accuracy: 0.2128
Epoch 288/600
553/553 - 

Epoch 407/600
553/553 - 2s - loss: 2.0717 - accuracy: 0.2112
Epoch 408/600
553/553 - 3s - loss: 2.0714 - accuracy: 0.2123
Epoch 409/600
553/553 - 2s - loss: 2.0725 - accuracy: 0.2135
Epoch 410/600
553/553 - 2s - loss: 2.0717 - accuracy: 0.2131
Epoch 411/600
553/553 - 2s - loss: 2.0721 - accuracy: 0.2148
Epoch 412/600
553/553 - 2s - loss: 2.0717 - accuracy: 0.2145
Epoch 413/600
553/553 - 2s - loss: 2.0721 - accuracy: 0.2134
Epoch 414/600
553/553 - 1s - loss: 2.0718 - accuracy: 0.2129
Epoch 415/600
553/553 - 2s - loss: 2.0718 - accuracy: 0.2147
Epoch 416/600
553/553 - 2s - loss: 2.0717 - accuracy: 0.2128
Epoch 417/600
553/553 - 1s - loss: 2.0712 - accuracy: 0.2136
Epoch 418/600
553/553 - 1s - loss: 2.0724 - accuracy: 0.2125
Epoch 419/600
553/553 - 1s - loss: 2.0715 - accuracy: 0.2142
Epoch 420/600
553/553 - 1s - loss: 2.0715 - accuracy: 0.2145
Epoch 421/600
553/553 - 1s - loss: 2.0720 - accuracy: 0.2137
Epoch 422/600
553/553 - 2s - loss: 2.0719 - accuracy: 0.2125
Epoch 423/600
553/553 - 

Epoch 542/600
553/553 - 4s - loss: 2.0714 - accuracy: 0.2125
Epoch 543/600
553/553 - 2s - loss: 2.0710 - accuracy: 0.2133
Epoch 544/600
553/553 - 2s - loss: 2.0713 - accuracy: 0.2142
Epoch 545/600
553/553 - 2s - loss: 2.0709 - accuracy: 0.2127
Epoch 546/600
553/553 - 2s - loss: 2.0708 - accuracy: 0.2152
Epoch 547/600
553/553 - 2s - loss: 2.0710 - accuracy: 0.2135
Epoch 548/600
553/553 - 2s - loss: 2.0710 - accuracy: 0.2145
Epoch 549/600
553/553 - 2s - loss: 2.0708 - accuracy: 0.2116
Epoch 550/600
553/553 - 1s - loss: 2.0707 - accuracy: 0.2129
Epoch 551/600
553/553 - 1s - loss: 2.0710 - accuracy: 0.2126
Epoch 552/600
553/553 - 2s - loss: 2.0706 - accuracy: 0.2129
Epoch 553/600
553/553 - 1s - loss: 2.0713 - accuracy: 0.2133
Epoch 554/600
553/553 - 2s - loss: 2.0710 - accuracy: 0.2135
Epoch 555/600
553/553 - 2s - loss: 2.0716 - accuracy: 0.2125
Epoch 556/600
553/553 - 1s - loss: 2.0711 - accuracy: 0.2125
Epoch 557/600
553/553 - 1s - loss: 2.0707 - accuracy: 0.2122
Epoch 558/600
553/553 - 

<tensorflow.python.keras.callbacks.History at 0x19802929860>

In [20]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

185/185 - 0s - loss: 2.1044 - accuracy: 0.1896
Normal Neural Network - Loss: 2.1044347286224365, Accuracy: 0.18964345753192902


## Save the Model

In [25]:
# save model 
filename = 'Models/deep_spot_pop.h5'
joblib.dump(model, filename)

TypeError: can't pickle _thread.RLock objects