In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [2]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\msflo\anaconda3\envs\pythondata\lib\site-packages (0.0)


In [3]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(1)

In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import joblib
import numpy as np
import pandas as pd
import sklearn
import sklearn.datasets

In [5]:
df = pd.read_csv('data/audio_features_hot_100_1958_2019.csv')

# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()
# Display dataframe
df.head()

Unnamed: 0,track_id,artist,track,spotify_genre,spotify_track_id,spotify_track_album,spotify_track_explicit,spotify_track_duration_ms,spotify_track_popularity,danceability,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,"AdictoTainy, Anuel AA & Ozuna","Tainy, Anuel AA & Ozuna",Adicto,['pop reggaeton'],3jbT1Y5MoPwEIpZndDDwVq,Adicto (with Anuel AA & Ozuna),0.0,270740.0,91.0,0.734,...,10.0,-4.803,0.0,0.0735,0.017,1.6e-05,0.179,0.623,80.002,4.0
2,ShallowLady Gaga & Bradley Cooper,Lady Gaga & Bradley Cooper,Shallow,"['dance pop', 'pop']",2VxeLyX666F8uXCJ0dZF8B,A Star Is Born Soundtrack,0.0,215733.0,88.0,0.572,...,7.0,-6.362,1.0,0.0308,0.371,0.0,0.231,0.323,95.799,4.0
3,EnemiesPost Malone Featuring DaBaby,Post Malone Featuring DaBaby,Enemies,"['dfw rap', 'melodic rap', 'rap']",0Xek5rqai2jcOWCYWJfVCF,Hollywood's Bleeding,1.0,196760.0,86.0,0.542,...,6.0,-4.169,1.0,0.21,0.0588,0.0,0.0955,0.667,76.388,4.0
4,"Bacc At It AgainYella Beezy, Gucci Mane & Quavo","Yella Beezy, Gucci Mane & Quavo",Bacc At It Again,"['dfw rap', 'rap', 'southern hip hop', 'trap']",2biNa12dMbHJrHVFRt8JyO,Bacc At It Again,1.0,228185.0,61.0,0.948,...,8.0,-5.725,0.0,0.168,0.00124,1e-06,0.0716,0.856,135.979,4.0
5,The ArcherTaylor Swift,Taylor Swift,The Archer,"['dance pop', 'pop', 'post-teen pop']",3pHkh7d0lzM2AldUtz2x37,Lover,0.0,211240.0,76.0,0.292,...,0.0,-9.375,1.0,0.0401,0.12,0.00569,0.0663,0.166,124.344,4.0


In [6]:
df['target'] = pd.qcut(df['spotify_track_popularity'],10, labels = False)

In [7]:
from sklearn.model_selection import train_test_split
y = pd.Series(df["target"])
X = df.drop(columns=["track_id", "artist", "track", "spotify_genre", "spotify_track_id", "spotify_track_album", "target"])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [8]:
X_train.describe().to_csv('xtrain_describe.csv')

In [9]:
X_train

Unnamed: 0,spotify_track_explicit,spotify_track_duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
28040,0.0,203440.0,0.665,0.664,4.0,-9.480,1.0,0.0358,0.1860,0.000002,0.1020,0.960,88.679,4.0
17380,0.0,147653.0,0.434,0.780,0.0,-7.826,1.0,0.1630,0.6500,0.000000,0.0682,0.644,171.940,4.0
7310,0.0,251293.0,0.431,0.735,1.0,-6.995,1.0,0.0508,0.0945,0.000111,0.1170,0.360,139.984,4.0
5864,1.0,284600.0,0.736,0.549,8.0,-6.580,1.0,0.1290,0.0541,0.000000,0.1090,0.272,142.064,4.0
20132,0.0,227933.0,0.715,0.400,9.0,-13.250,1.0,0.0876,0.0760,0.000000,0.0516,0.706,96.607,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12776,0.0,204200.0,0.623,0.590,4.0,-5.014,1.0,0.0280,0.6460,0.000000,0.2950,0.579,137.071,4.0
20620,0.0,169533.0,0.628,0.693,2.0,-11.236,1.0,0.0301,0.0562,0.005020,0.1860,0.822,125.349,4.0
6085,0.0,263960.0,0.650,0.665,6.0,-12.763,0.0,0.0469,0.2080,0.000000,0.3040,0.725,135.598,4.0
14192,0.0,225560.0,0.802,0.738,0.0,-5.975,0.0,0.2360,0.0192,0.000000,0.2100,0.597,110.862,4.0


In [10]:
y_train

28040    0
17380    0
7310     6
5864     6
20132    1
        ..
12776    8
20620    0
6085     6
14192    5
252      9
Name: target, Length: 17669, dtype: int64

In [11]:
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense

In [13]:
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [14]:
y_train_categorical

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [15]:
X_train_scaled.shape

(17669, 14)

In [16]:
model = Sequential()
model.add(Dense(units=10, activation='relu', input_dim=14))
model.add(Dense(units=10, activation='softmax'))

In [17]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                150       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
Total params: 260
Trainable params: 260
Non-trainable params: 0
_________________________________________________________________


In [None]:
# set
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=600,
    shuffle=True,
    verbose=2
)

Epoch 1/600
553/553 - 2s - loss: 2.2804 - accuracy: 0.1257
Epoch 2/600
553/553 - 2s - loss: 2.1893 - accuracy: 0.1740
Epoch 3/600
553/553 - 2s - loss: 2.1587 - accuracy: 0.1785
Epoch 4/600
553/553 - 2s - loss: 2.1502 - accuracy: 0.1789
Epoch 5/600
553/553 - 2s - loss: 2.1460 - accuracy: 0.1821
Epoch 6/600
553/553 - 2s - loss: 2.1429 - accuracy: 0.1846
Epoch 7/600
553/553 - 2s - loss: 2.1409 - accuracy: 0.1861
Epoch 8/600
553/553 - 2s - loss: 2.1385 - accuracy: 0.1881
Epoch 9/600
553/553 - 2s - loss: 2.1376 - accuracy: 0.1872
Epoch 10/600
553/553 - 3s - loss: 2.1356 - accuracy: 0.1880
Epoch 11/600
553/553 - 2s - loss: 2.1344 - accuracy: 0.1907
Epoch 12/600
553/553 - 3s - loss: 2.1331 - accuracy: 0.1863
Epoch 13/600
553/553 - 2s - loss: 2.1321 - accuracy: 0.1890
Epoch 14/600
553/553 - 2s - loss: 2.1306 - accuracy: 0.1884
Epoch 15/600
553/553 - 2s - loss: 2.1293 - accuracy: 0.1891
Epoch 16/600
553/553 - 2s - loss: 2.1284 - accuracy: 0.1899
Epoch 17/600
553/553 - 5s - loss: 2.1275 - accura

553/553 - 2s - loss: 2.0811 - accuracy: 0.2109
Epoch 138/600
553/553 - 2s - loss: 2.0812 - accuracy: 0.2122
Epoch 139/600
553/553 - 2s - loss: 2.0813 - accuracy: 0.2114
Epoch 140/600
553/553 - 2s - loss: 2.0808 - accuracy: 0.2130
Epoch 141/600
553/553 - 2s - loss: 2.0807 - accuracy: 0.2127
Epoch 142/600
553/553 - 2s - loss: 2.0805 - accuracy: 0.2118
Epoch 143/600
553/553 - 2s - loss: 2.0809 - accuracy: 0.2123
Epoch 144/600
553/553 - 2s - loss: 2.0808 - accuracy: 0.2123
Epoch 145/600
553/553 - 2s - loss: 2.0804 - accuracy: 0.2123
Epoch 146/600
553/553 - 2s - loss: 2.0804 - accuracy: 0.2123
Epoch 147/600
553/553 - 2s - loss: 2.0800 - accuracy: 0.2119
Epoch 148/600
553/553 - 2s - loss: 2.0805 - accuracy: 0.2114
Epoch 149/600
553/553 - 2s - loss: 2.0801 - accuracy: 0.2113
Epoch 150/600
553/553 - 2s - loss: 2.0792 - accuracy: 0.2114
Epoch 151/600
553/553 - 2s - loss: 2.0795 - accuracy: 0.2118
Epoch 152/600
553/553 - 3s - loss: 2.0798 - accuracy: 0.2112
Epoch 153/600
553/553 - 2s - loss: 2.0

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

## Save the Model

In [None]:
# save model 
filename = 'Models/deep_spot_pop.h5'
joblib.dump(model, filename)