In [1]:
import pandas as pd
import numpy as np
import json
import re 
import sys
import itertools

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from tensorflow.keras import utils

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, accuracy_score

import seaborn as sns
import matplotlib.pyplot as plt


import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

import warnings
warnings.filterwarnings("ignore")

In [2]:
pd.set_option('display.max_columns', None)
# pd.set_option("max_rows", None)

In [3]:
spotify_df = pd.read_csv('dataset.csv')

In [4]:
spotify_df.dropna(inplace=True)

In [5]:
spotify_df.isna().sum()

Unnamed: 0          0
track_id            0
artists             0
album_name          0
track_name          0
popularity          0
duration_ms         0
explicit            0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
track_genre         0
dtype: int64

In [6]:
spotify_df.head()

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [7]:
mood_prep = spotify_df[['duration_ms', 'danceability', 'acousticness', 'energy', 'instrumentalness',
       'liveness', 'valence', 'loudness', 'speechiness', 'tempo']]

In [8]:
col_features = mood_prep.columns[:]
col_features

Index(['duration_ms', 'danceability', 'acousticness', 'energy',
       'instrumentalness', 'liveness', 'valence', 'loudness', 'speechiness',
       'tempo'],
      dtype='object')

In [9]:
mood_trans = MinMaxScaler().fit_transform(mood_prep[col_features])
mood_trans_np = np.array(mood_prep[col_features])

In [10]:
mood_trans_np[0]

array([ 2.30666e+05,  6.76000e-01,  3.22000e-02,  4.61000e-01,
        1.01000e-06,  3.58000e-01,  7.15000e-01, -6.74600e+00,
        1.43000e-01,  8.79170e+01])

In [11]:
mood_trans_np[10011]

array([ 3.65932e+05,  5.54000e-01,  7.25000e-02,  8.10000e-01,
        3.69000e-01,  1.48000e-01,  2.10000e-01, -5.07900e+00,
        3.82000e-02,  1.28023e+02])

In [12]:
df = pd.read_csv('data_moods.csv')

In [13]:
cl_features = df.columns[6:-3]
X= MinMaxScaler().fit_transform(df[cl_features])
X2 = np.array(df[cl_features])
Y = df['mood']

In [14]:

encoder = LabelEncoder()
encoder.fit(Y)
encoded_y = encoder.transform(Y)


dummy_y = utils.to_categorical(encoded_y)

X_train,X_test,Y_train,Y_test = train_test_split(X,encoded_y,test_size=0.2,random_state=15)

target = pd.DataFrame({'mood':df['mood'].tolist(),'encode':encoded_y}).drop_duplicates().sort_values(['encode'],ascending=True)
target

Unnamed: 0,mood,encode
5,Calm,0
4,Energetic,1
0,Happy,2
1,Sad,3


In [15]:
Y

0          Happy
1            Sad
2            Sad
3          Happy
4      Energetic
         ...    
681         Calm
682          Sad
683          Sad
684          Sad
685         Calm
Name: mood, Length: 686, dtype: object

In [16]:
encoded_y

array([2, 3, 3, 2, 1, 0, 0, 2, 1, 1, 1, 0, 0, 1, 2, 0, 1, 3, 3, 1, 3, 3,
       1, 1, 3, 2, 1, 0, 0, 3, 3, 3, 1, 2, 1, 2, 1, 3, 0, 0, 0, 1, 1, 0,
       3, 3, 3, 3, 0, 1, 0, 3, 2, 1, 0, 3, 2, 3, 2, 0, 1, 3, 3, 3, 1, 1,
       1, 1, 3, 0, 3, 2, 2, 1, 0, 2, 1, 0, 1, 0, 3, 2, 3, 2, 0, 2, 0, 1,
       3, 1, 2, 1, 0, 2, 0, 0, 1, 0, 3, 3, 1, 0, 0, 2, 1, 0, 1, 3, 3, 2,
       1, 0, 2, 1, 2, 2, 3, 0, 1, 0, 0, 0, 3, 2, 2, 3, 1, 3, 3, 1, 3, 3,
       0, 0, 0, 3, 2, 0, 3, 1, 0, 3, 1, 1, 3, 2, 2, 2, 3, 2, 1, 3, 0, 2,
       1, 1, 3, 1, 0, 3, 2, 3, 2, 0, 1, 3, 0, 1, 0, 0, 0, 2, 1, 3, 2, 2,
       1, 3, 3, 3, 1, 1, 2, 1, 0, 3, 0, 0, 3, 3, 0, 1, 0, 0, 3, 1, 0, 0,
       0, 2, 2, 2, 1, 0, 0, 2, 1, 2, 2, 2, 1, 3, 0, 1, 1, 2, 2, 3, 3, 0,
       0, 0, 0, 2, 1, 1, 1, 1, 2, 2, 3, 1, 2, 3, 1, 2, 2, 1, 1, 1, 1, 1,
       2, 2, 3, 2, 2, 3, 0, 3, 3, 0, 1, 3, 2, 2, 3, 1, 3, 3, 2, 0, 3, 0,
       0, 3, 0, 0, 1, 1, 0, 0, 3, 0, 3, 1, 3, 3, 3, 0, 3, 2, 0, 2, 3, 3,
       0, 0, 3, 1, 3, 3, 2, 2, 1, 0, 0, 0, 3, 2, 0,

In [17]:
dummy_y

array([[0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       ...,
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.]], dtype=float32)

In [18]:
def base_model():
    model = Sequential()
    model.add(Dense(8,input_dim=10,activation='relu'))
    model.add(Dense(4,activation='softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='adam',
                 metrics=['accuracy'])
    return model

In [19]:
#Configure the model
estimator = KerasClassifier(build_fn=base_model,epochs=300,batch_size=200,verbose=0)

In [20]:
encoded_y.shape, dummy_y.shape

((686,), (686, 4))

In [21]:
X.shape

(686, 10)

In [22]:
#Evaluate the model using KFold cross validation
kfold = KFold(n_splits=10,shuffle=True)
results = cross_val_score(estimator,X,dummy_y,cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100,results.std()*100))

2024-02-13 00:49:31.331814: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-02-13 00:49:31.331834: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-02-13 00:49:31.331841: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-02-13 00:49:31.331868: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-13 00:49:31.331883: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2024-02-13 00:49:31.631774: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Baseline: 78.86% (3.95%)


In [23]:
estimator.fit(X_train, dummy_y[Y_train])
y_preds = estimator.predict(X_test)

In [24]:
#Join the model and the scaler in a Pipeline
pip = Pipeline([('minmaxscaler',MinMaxScaler()),('keras',KerasClassifier(build_fn=base_model,epochs=300, batch_size=200,verbose=0))])
#Fit the Pipeline
pip.fit(X2,dummy_y)

In [25]:
def predict_mood(preds):
    
#     pipe = Pipeline([('minmaxscaler',MinMaxScaler()),('keras',KerasClassifier(build_fn=base_model,epochs=300, batch_size=200,verbose=0))])
#     #Fit the Pipeline
#     pipe.fit(X2,encoded_y)

    preds_features = np.array(preds[:]).reshape(-1,1).T

    #Predict the features of the song
    results = pip.predict(preds_features)
    results = np.argmax(results)

    mood = np.array(target['mood'][target['encode']==int(results)])

    return str(mood[0])
    #print(f"{name_song} by {artist} is a {mood[0].upper()} song")

In [26]:
res = []

for i in range(len(mood_trans_np)):
  res.append(predict_mood(mood_trans_np[i]))

In [27]:
spotify_df.shape

(113999, 21)

In [28]:
print(len(res))

113999


In [29]:
spotify_df['Mood'] = np.resize(res,len(spotify_df))

In [30]:
spotify_df.to_csv('kaggleMusicMoodFinal3.csv')

In [31]:
res.count("Sad")

37308

In [32]:
res.count("Happy")

35943

In [33]:
res.count("Energetic")

33980

In [34]:
res.count("Calm")

6768

In [35]:
spotify_df.shape

(113999, 22)