# Libraries

In [43]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings('ignore')

# Read DataFrame

In [14]:
df = pd.read_csv("processed_data.csv")
df.head(3)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,time_signature,popularity,song_name,artist_name,genius_id,lyric,tags,tier,genius_url,released_year
0,0.507,0.394,6,-7.188,0,0.035,0.508,0.0,0.0825,0.164,...,4,85,Shot Glass of Tears,Jung Kook,9626003,"Tell me, am I ever gonna feel again? Tell me,...",pop,C,https://genius.com/Jung-kook-shot-glass-of-tea...,2023.0
1,0.842,0.734,1,-5.065,0,0.0588,0.0427,0.0,0.106,0.952,...,4,81,Sucker,Jonas Brothers,4350998,We go together Better than birds of a feather...,pop,D,https://genius.com/Jonas-brothers-sucker-lyrics,2019.0
2,0.759,0.561,0,-5.643,0,0.131,0.803,0.121,0.101,0.491,...,4,82,VISTA AL MAR,Quevedo,8357473,BlueFire Rápido llega' y rápido te vas (Vas) ...,pop,E,https://genius.com/Quevedo-vista-al-mar-lyrics,2022.0


## Data normalization

In [31]:
scaler = MinMaxScaler()

In [32]:
df_normalized = df.copy()
df_normalized.iloc[:, :-12] = pd.DataFrame(scaler.fit_transform(df_normalized.iloc[:, :-12]), columns=df_normalized.iloc[:, :-12].columns)

In [33]:
df_normalized

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,time_signature,popularity,song_name,artist_name,genius_id,lyric,tags,tier,genius_url,released_year
0,0.435897,0.387650,0.545455,0.752007,0.0,0.021643,0.513646,0.000000,0.069130,0.161432,...,4,85,Shot Glass of Tears,Jung Kook,9626003,"Tell me, am I ever gonna feel again? Tell me,...",pop,C,https://genius.com/Jung-kook-shot-glass-of-tea...,2023.0
1,0.844933,0.736977,0.090909,0.830893,0.0,0.063523,0.043168,0.000000,0.096526,0.970234,...,4,81,Sucker,Jonas Brothers,4350998,We go together Better than birds of a feather...,pop,D,https://genius.com/Jonas-brothers-sucker-lyrics,2019.0
2,0.743590,0.559231,0.000000,0.809416,0.0,0.190568,0.811930,0.122594,0.090697,0.497064,...,4,82,VISTA AL MAR,Quevedo,8357473,BlueFire Rápido llega' y rápido te vas (Vas) ...,pop,E,https://genius.com/Quevedo-vista-al-mar-lyrics,2022.0
3,0.700855,0.631152,0.363636,0.854860,0.0,0.080767,0.251764,0.000000,0.221264,0.975366,...,4,82,WANDA,Quevedo,8719683,"O-O-Ovy On The Drums Es usted o nadie, ¿oyó? ...",pop,E,https://genius.com/Quevedo-wanda-lyrics,2023.0
4,0.750916,0.348608,1.000000,0.812649,0.0,0.303185,0.225474,0.000000,0.085218,0.187092,...,4,86,Don't,Bryson Tiller,579968,"Don't, don't play with her, don't be dishones...",r-b,C,https://genius.com/Bryson-tiller-dont-lyrics,2014.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1448,0.416361,0.694853,0.000000,0.778054,0.0,0.026395,0.213341,0.000007,0.351830,0.415979,...,4,85,Hymn for the Weekend,Coldplay,2353271,"And said drink from me, drink from me (Oh-ah-...",r-b,C,https://genius.com/Coldplay-hymn-for-the-weeke...,2016.0
1449,0.655678,0.796568,0.454545,0.916952,1.0,0.039592,0.193118,0.000000,0.167638,0.681816,...,4,86,Happier,Marshmello,3792450,"Lately, I've been, I've been thinking I want ...",pop,D,https://genius.com/Marshmello-and-bastille-hap...,2018.0
1450,0.954823,0.452379,1.000000,0.661638,0.0,0.055253,0.017485,0.044174,0.021217,0.900439,...,4,81,Billie Jean,Michael Jackson,1644,She was more like a beauty queen from a movie...,pop,C,https://genius.com/Michael-jackson-billie-jean...,1982.0
1451,0.175824,0.641426,1.000000,0.672934,1.0,0.011262,0.016474,0.222898,0.103521,0.225069,...,4,82,November Rain,Guns N' Roses,84547,When I look into your eyes I can see a love r...,rock,D,https://genius.com/Guns-n-roses-november-rain-...,1991.0


# Train/Test Split

In [36]:
data = df_normalized.iloc[:, :-12]
label = df_normalized.tags

In [39]:
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size = 0.2, random_state = 18) 

# Classifier Definition

In [10]:
rfc_model = RandomForestClassifier()
dt_model = DecisionTreeClassifier()
svm_model = SVC()

## Param-Grid Design

In [44]:
rfc_param_grid = {
    'n_estimators': [100, 300, 500],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

dt_param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

svm_param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto']
}

## Grid Search CV

In [47]:
rfc_f = GridSearchCV(rfc_model, rfc_param_grid, verbose=2)
dt_f = GridSearchCV(dt_model, dt_param_grid, verbose=2)
svm_f = GridSearchCV(svm_model, svm_param_grid, verbose=2)

## Fit DATA

In [48]:
rfc_f.fit(X_train, y_train)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   1.1s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   1.2s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=300; tot

KeyboardInterrupt: 

In [None]:
dt_f.fit(X_train, y_train)

In [None]:
svm_f.fit(X_train, y_train)