In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# leemos el dataset
attributes_spotify = pd.read_csv(
    "https://raw.githubusercontent.com/emmanueliarussi/DataScienceCapstone/master/3_MidtermProjects/ProjectBOM/data/attributes_spotify.csv")

# eliminamos columnas que no se utilizaran
attributes_spotify.drop('Unnamed: 0', inplace=True, axis=1)
attributes_spotify.drop('song_title', inplace=True, axis=1)
attributes_spotify.drop('artist', inplace=True, axis=1)

# modelos a comparar
names = [
    "KNN",
    "SVM",
    "Random Forest"
]

models = [
    KNeighborsClassifier(5),
    SVC(kernel="linear", C=0.025),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
]

data = {
    "Modelo": names,
    "TPR": [],
    "F1": [],
    "Precisión": [],
    "Exactitud": [],
}

# porcentajes para el train-validation-test
train_ratio = 0.60
validation_ratio = 0.20
test_ratio = 0.20

# split del dataset para entrenar
x_train, x_test, y_train, y_test = train_test_split(
    attributes_spotify, attributes_spotify.target, test_size=1-train_ratio)

# split del dataset para validar
x_val, x_test, y_val, y_test = train_test_split(
    x_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio))

# ejecucion de los modelos
for name, model in zip(names, models):
    model.fit(x_val, y_val)
    predicted = model.predict(x_test)

    # visualizacion de datos
    data["TPR"].append(precision_score(y_test, predicted))
    data["F1"].append(f1_score(y_test, predicted))
    data["Precisión"].append(precision_score(y_test, predicted))
    data["Exactitud"].append(accuracy_score(y_test, predicted))

print(pd.DataFrame(data))

          Modelo       TPR        F1  Precisión  Exactitud
0            KNN  0.540284  0.554745   0.540284   0.547030
1            SVM  0.488525  0.590099   0.488525   0.487624
2  Random Forest  0.947368  0.968215   0.947368   0.967822
