# Análisis exploratorio

In [2]:
#%pip install seaborn

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score, recall_score, f1_score


In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import os

os.listdir('/content/drive/My Drive/Colab Notebooks/')


In [None]:
import os

os.chdir('/content/drive/My Drive/Colab Notebooks/')

In [None]:
data = pd.read_csv('mentalhealth_algorithms.csv')

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
age_mean = np.mean(data["Age"])
print("La media de edad de las personas que han respondido las encuestas es: {:.2f}".format(age_mean))

hours_day_mean = np.mean(data["Hours per day"])
print("El número medio de horas al día que escuchan los participantes es: {:.2f}".format(hours_day_mean))

while_working_porc = np.mean(data["While working"] == 1) * 100
print("El {:.2f}% de los participantes escuchan música mientras trabajan o estudian".format(while_working_porc))

instrumentalist_porc = np.mean(data["Instrumentalist"] == 1) * 100
print("El {:.2f}% de los participantes tocan algún instrumento".format(instrumentalist_porc))

composer_porc = np.mean(data["Composer"] == 1) * 100
print("El {:.2f}% de los participantes componen música".format(composer_porc))

exploratory_porc = np.mean(data["Exploratory"] == 1) * 100
print("El {:.2f}% de los participantes escuchan nuevos artistas".format(exploratory_porc))

language_porc = np.mean(data["Foreign languages"] == 1) * 100
print("El {:.2f}% de los participantes escuchan música en otro idioma".format(language_porc))

effects_improve_porc = np.mean(data["Music effects"] == "Improve") * 100
no_effects_porc = np.mean(data["Music effects"] == "No effect") * 100
effects_worse_porc = np.mean(data["Music effects"] == "Worsen") * 100
print("El {:.2f}% de los participantes piensan que escuchar música afecta positivamente en la salud mental, mientras que el {:.2f}% piensan que no tiene ningún efecto. \
Tan sólo el {:.2f}% creen que tiene efectos negativos.".format(effects_improve_porc, no_effects_porc, effects_worse_porc))


In [None]:
streaming_service = data["Primary streaming service"].value_counts()

bars = plt.barh(streaming_service.index.values, streaming_service)
plt.xlabel('Counts')
plt.ylabel('Primary streaming service')
plt.title('Distribution primary streaming service')
plt.bar_label(bars, label_type='edge', fontsize=7, padding=3)
plt.show()

In [None]:
# Configurar el tamaño del gráfico
genres = ['Frequency [Classical]', 'Frequency [Country]', 'Frequency [EDM]',
'Frequency [Folk]','Frequency [Gospel]', 'Frequency [Hip hop]', 'Frequency [Jazz]',
'Frequency [K pop]', 'Frequency [Latin]', 'Frequency [Lofi]', 'Frequency [Metal]', 'Frequency [Pop]',
'Frequency [R&B]', 'Frequency [Rap]', 'Frequency [Rock]', 'Frequency [Video game music]']
frequencies = ['Never', 'Rarely', 'Sometimes', 'Very frequently']
options = [0, 1, 2, 3]
plt.figure(figsize=(8, 6))

fig, axs = plt.subplots(2, 2, figsize=(20,35))


for i, option in enumerate(options):
    counts = []
    for genero in genres:
        count = (data[genero] == option).sum()
        counts.append(count)

    row = i // 2
    col = i % 2

    bars = axs[row, col].bar(genres, counts)
    axs[row, col].set_title(f'Frequency "{frequencies[i]}"')
    axs[row, col].set_xticks(range(len(genres)))
    axs[row, col].set_xticklabels(genres, rotation=90)
    axs[row, col].bar_label(bars, label_type='edge', fontsize=7, padding=3)


plt.ylabel('Counts')

plt.show()

In [None]:
genres = ['Frequency [Classical]', 'Frequency [Country]', 'Frequency [EDM]',
'Frequency [Folk]','Frequency [Gospel]', 'Frequency [Hip hop]', 'Frequency [Jazz]',
'Frequency [K pop]', 'Frequency [Latin]', 'Frequency [Lofi]', 'Frequency [Metal]', 'Frequency [Pop]',
'Frequency [R&B]', 'Frequency [Rap]', 'Frequency [Rock]', 'Frequency [Video game music]']
labels = {0: 'Never', 1: 'Rarely', 2: 'Sometimes', 3: 'Very frequently'}
colors = {0: 'red', 1: 'orange', 2: 'green', 3: 'blue'}
fig, axs = plt.subplots(6, 3, figsize=(15,15))

for i, genre in enumerate(genres):

    genre_data = data[genre].value_counts()

    row= i//3
    col = i%3

    axs[row, col].pie(genre_data, labels = [labels.get(label, label) for label in genre_data.index], autopct='%1.1f%%',  colors=[colors[label] for label in genre_data.index])
    axs[row, col].set_title(genre)


# Eliminar gráficos vacíos
for i in range(len(genres), 18):
    axs.flatten()[i].axis('off')

# Ajustar gráficos para evitar que se solapen
plt.tight_layout()

plt.show()

In [None]:
diseases = ['Anxiety', 'Depression', 'Insomnia', 'OCD']

fig, axs = plt.subplots(2, 2, figsize=(15,15))

for i, disease in enumerate(diseases):

    disease_data = data[disease].value_counts()

    row= i//2
    col = i%2

    bars = axs[row, col].bar(disease_data.index.values, disease_data)
    axs[row, col].bar_label(bars, label_type='edge', fontsize=7, padding=3)
    axs[row, col].set_title(disease)

plt.tight_layout()
plt.show()

In [None]:
# Se analiza la correlación entre las diferentes variables.
data_correlation_info = ['Age', 'Hours per day', 'While working',
'Instrumentalist', 'Composer', 'Exploratory', 'Foreign languages']

plt.figure(figsize=(15,7))
correlation_info = data[data_correlation_info].corr()
sns.heatmap(correlation_info, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.show()

In [None]:
data_correlation_health = ['Anxiety', 'Depression', 'Insomnia',
'OCD']

plt.figure(figsize=(15,7))
correlation_health = data[data_correlation_health].corr()
sns.heatmap(correlation_health, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.show()

In [None]:
genres_list = ["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]", "Anxiety", "Depression", "Insomnia", "OCD"]

plt.figure(figsize=(15,7))
correlation_genres = data[genres_list].corr()
sns.heatmap(correlation_genres, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.show()

# Algoritmos predictivos

## Búsqueda hiperparámetros

### Ansiedad

In [None]:
X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Anxiety"].astype('int64')

param_lr = {'C':[0.1,1,10,100]}
lr = LogisticRegression()
grid_lr = GridSearchCV(lr, param_lr, cv=5)
grid_lr.fit(X,y)
best_params_lr = grid_lr.best_params_
print("Mejores parámetros lr:", best_params_lr)


In [None]:
param_mlp = {'hidden_layer_sizes': [(50,), (100,), (50,50)],
             'alpha': [0.0001, 0.001, 0.01]}
mlp = MLPClassifier()
grid_mlp = GridSearchCV(mlp, param_mlp, cv=5)
grid_mlp.fit(X,y)
best_params_mlp = grid_mlp.best_params_
print("Mejores parámetros mlp:",best_params_mlp)

In [None]:
param_knn = {'n_neighbors': [2,3,5,7,8,10]}
knn = KNeighborsClassifier()
grid_knn = GridSearchCV(knn, param_knn, cv=5)
grid_knn.fit(X,y)
best_params_knn = grid_knn.best_params_
print("Mejores parámetros knn:",best_params_knn)

In [None]:
param_dt = {'max_depth': [None, 5, 10, 15, 20], 'min_samples_split': [2,3,5,10]}
dt = DecisionTreeClassifier()
grid_dt = GridSearchCV(dt, param_dt, cv=5)
grid_dt.fit(X,y)
best_params_dt = grid_dt.best_params_
print("Mejores parámetros dt:",best_params_dt)

In [None]:
param_rf = {'n_estimators': [100, 200, 300], 'max_depth': [None, 10, 20]}
rf = RandomForestClassifier()
grid_rf = GridSearchCV(rf, param_rf, cv=5)
grid_rf.fit(X,y)
best_params_rf = grid_rf.best_params_
print("Mejores parámetros rf:",best_params_rf)

### Depresión

In [None]:
X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Depression"].astype('int64')

param_lr = {'C':[0.1,1,10,100], 'penalty': ['l1', 'l2']}
lr = LogisticRegression()
grid_lr = GridSearchCV(lr, param_lr, cv=5)
grid_lr.fit(X,y)
best_params_lr = grid_lr.best_params_
print("Mejores parámetros lr:", best_params_lr)


In [None]:
param_mlp = {'hidden_layer_sizes': [(50,), (100,), (50,50)],
             'alpha': [0.0001, 0.001, 0.01]}
mlp = MLPClassifier()
grid_mlp = GridSearchCV(mlp, param_mlp, cv=5)
grid_mlp.fit(X,y)
best_params_mlp = grid_mlp.best_params_
print("Mejores parámetros mlp:",best_params_mlp)

In [None]:
param_knn = {'n_neighbors': [2,3,5,7,8,10]}
knn = KNeighborsClassifier()
grid_knn = GridSearchCV(knn, param_knn, cv=5)
grid_knn.fit(X,y)
best_params_knn = grid_knn.best_params_
print("Mejores parámetros knn:",best_params_knn)

In [None]:
param_dt = {'max_depth': [None, 5, 10, 15, 20], 'min_samples_split': [2,3,5,10]}
dt = DecisionTreeClassifier()
grid_dt = GridSearchCV(dt, param_dt, cv=5)
grid_dt.fit(X,y)
best_params_dt = grid_dt.best_params_
print("Mejores parámetros dt:",best_params_dt)

In [None]:
param_rf = {'n_estimators': [100, 200, 300], 'max_depth': [None, 10, 20]}
rf = RandomForestClassifier()
grid_rf = GridSearchCV(rf, param_rf, cv=5)
grid_rf.fit(X,y)
best_params_rf = grid_rf.best_params_
print("Mejores parámetros rf:",best_params_rf)

### Insomnio

In [None]:
X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Insomnia"].astype('int64')

param_lr = {'C':[0.1,1,10,100], 'penalty': ['l1', 'l2']}
lr = LogisticRegression()
grid_lr = GridSearchCV(lr, param_lr, cv=5)
grid_lr.fit(X,y)
best_params_lr = grid_lr.best_params_
print("Mejores parámetros lr:", best_params_lr)

In [None]:
param_mlp = {'hidden_layer_sizes': [(50,), (100,), (50,50)],
             'alpha': [0.0001, 0.001, 0.01]}
mlp = MLPClassifier()
grid_mlp = GridSearchCV(mlp, param_mlp, cv=5)
grid_mlp.fit(X,y)
best_params_mlp = grid_mlp.best_params_
print("Mejores parámetros mlp:",best_params_mlp)

In [None]:
param_knn = {'n_neighbors': [2,3,5,7,8,10]}
knn = KNeighborsClassifier()
grid_knn = GridSearchCV(knn, param_knn, cv=5)
grid_knn.fit(X,y)
best_params_knn = grid_knn.best_params_
print("Mejores parámetros knn:",best_params_knn)

In [None]:
param_dt = {'max_depth': [None, 5, 10, 15, 20], 'min_samples_split': [2,3,5,10]}
dt = DecisionTreeClassifier()
grid_dt = GridSearchCV(dt, param_dt, cv=5)
grid_dt.fit(X,y)
best_params_dt = grid_dt.best_params_
print("Mejores parámetros dt:",best_params_dt)

In [None]:
param_rf = {'n_estimators': [100, 200, 300], 'max_depth': [None, 10, 20]}
rf = RandomForestClassifier()
grid_rf = GridSearchCV(rf, param_rf, cv=5)
grid_rf.fit(X,y)
best_params_rf = grid_rf.best_params_
print("Mejores parámetros rf:",best_params_rf)

### OCD

In [None]:
X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["OCD"].astype('int64')

param_lr = {'C':[0.1,1,10,100], 'penalty': ['l1', 'l2']}
lr = LogisticRegression()
grid_lr = GridSearchCV(lr, param_lr, cv=5)
grid_lr.fit(X,y)
best_params_lr = grid_lr.best_params_
print("Mejores parámetros lr:", best_params_lr)

In [None]:
param_mlp = {'hidden_layer_sizes': [(50,), (100,), (50,50)],
             'alpha': [0.0001, 0.001, 0.01]}
mlp = MLPClassifier()
grid_mlp = GridSearchCV(mlp, param_mlp, cv=5)
grid_mlp.fit(X,y)
best_params_mlp = grid_mlp.best_params_
print("Mejores parámetros mlp:",best_params_mlp)

In [None]:
param_knn = {'n_neighbors': [2,3,5,7,8,10]}
knn = KNeighborsClassifier()
grid_knn = GridSearchCV(knn, param_knn, cv=5)
grid_knn.fit(X,y)
best_params_knn = grid_knn.best_params_
print("Mejores parámetros knn:",best_params_knn)

In [None]:
param_dt = {'max_depth': [None, 5, 10, 15, 20], 'min_samples_split': [2,3,5,10]}
dt = DecisionTreeClassifier()
grid_dt = GridSearchCV(dt, param_dt, cv=5)
grid_dt.fit(X,y)
best_params_dt = grid_dt.best_params_
print("Mejores parámetros dt:",best_params_dt)

In [None]:
param_rf = {'n_estimators': [100, 200, 300], 'max_depth': [None, 10, 20]}
rf = RandomForestClassifier()
grid_rf = GridSearchCV(rf, param_rf, cv=5)
grid_rf.fit(X,y)
best_params_rf = grid_rf.best_params_
print("Mejores parámetros rf:",best_params_rf)

## Entrenamiento

### Ansiedad

In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Anxiety"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = LogisticRegression(C= 10)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier


data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Anxiety"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = MLPClassifier(alpha = 0.001, hidden_layer_sizes = (50,))

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)


mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Anxiety"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = KNeighborsClassifier(n_neighbors= 10)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Anxiety"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = DecisionTreeClassifier(max_depth = 5, min_samples_split = 2)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)


mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier


data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Anxiety"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = RandomForestClassifier(max_depth = 10, n_estimators = 200)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)


mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



### Depresión

In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Depression"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = LogisticRegression(C = 0.1)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Depression"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = MLPClassifier(alpha = 0.001, hidden_layer_sizes = (50,))

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
          "Frequency [Video game music]"]]

y = data["Depression"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = KNeighborsClassifier(n_neighbors = 2)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
          "Frequency [Video game music]"]]

y = data["Depression"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = DecisionTreeClassifier(max_depth= 5, min_samples_split= 2)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score, recall_score, f1_score

data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
          "Frequency [Video game music]"]]

y = data["Depression"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = RandomForestClassifier(max_depth= 10, n_estimators= 100)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)


mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



###Insomnio

In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Insomnia"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = LogisticRegression(C = 0.1)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Insomnia"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = MLPClassifier(alpha = 0.001, hidden_layer_sizes = (50,))

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Insomnia"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = KNeighborsClassifier(n_neighbors = 2)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Insomnia"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = DecisionTreeClassifier(max_depth = 5, min_samples_split = 2)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["Insomnia"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = RandomForestClassifier(max_depth = 10, n_estimators = 300)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')



### OCD

In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["OCD"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = LogisticRegression(C = 0.1)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')


In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["OCD"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = MLPClassifier(alpha = 0.001, hidden_layer_sizes = (50,))

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')


In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["OCD"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = KNeighborsClassifier(n_neighbors = 2)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')


In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["OCD"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = DecisionTreeClassifier(max_depth = 5, min_samples_split = 3)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')


In [None]:
data.reset_index(drop=True, inplace=True)

X = data[["Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]",
            "Frequency [Folk]", "Frequency [Gospel]", "Frequency [Hip hop]",
            "Frequency [Jazz]", "Frequency [K pop]", "Frequency [Latin]",
            "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
            "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]",
            "Frequency [Video game music]"]]

y = data["OCD"].astype('int64')

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_accuracy_train=[]
scores_recall_train=[]
scores_f1_train=[]

scores_accuracy_test=[]
scores_recall_test=[]
scores_f1_test=[]


for train_index, test_index in kf.split(X):

  model = RandomForestClassifier(max_depth = 10, n_estimators = 200)

  X_train_kf, X_test_kf = X.iloc[train_index], X.iloc[test_index]
  y_train_kf, y_test_kf = y.iloc[train_index], y.iloc[test_index]

  model.fit(X_train_kf, y_train_kf)
  y_pred_train = model.predict(X_train_kf)
  y_pred_test = model.predict(X_test_kf)


  accuracy_fold = accuracy_score(y_train_kf, y_pred_train)
  recall_fold = recall_score(y_train_kf, y_pred_train, average='macro')
  f1_fold = f1_score(y_train_kf, y_pred_train, average='macro')

  scores_accuracy_train.append(accuracy_fold)
  scores_recall_train.append(recall_fold)
  scores_f1_train.append(f1_fold)

  test_accuracy = accuracy_score(y_test_kf, y_pred_test)
  test_recall = recall_score(y_test_kf, y_pred_test, average='macro')
  test_f1 = f1_score(y_test_kf, y_pred_test, average='macro')

  scores_accuracy_test.append(test_accuracy)
  scores_recall_test.append(test_recall)
  scores_f1_test.append(test_f1)



mean_accuracy_train = np.mean(scores_accuracy_train)
mean_recall_train = np.mean(scores_recall_train)
mean_f1_train = np.mean(scores_f1_train)

mean_accuracy_test = np.mean(scores_accuracy_test)
mean_recall_test = np.mean(scores_recall_test)
mean_f1_test = np.mean(scores_f1_test)

print(f'Accuracy promedio: {mean_accuracy_train}')
print(f'Recall promedio: {mean_recall_train}')
print(f'F1 promedio: {mean_f1_train}')

print(f'Accuracy test: {mean_accuracy_test}')
print(f'Recall test: {mean_recall_test}')
print(f'F1 test: {mean_f1_test}')
