In [109]:
from utils import *
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
import xgboost as xgb
import optuna

from utils import *
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# ! pip install openpyxl


En este notebook voy a aplicar random forest con optimización de parámetros a : 
 
 * Behavioral-features-per mimnute
 * concatenación de ambos

# 1. Estudio Behavioral-features-per minute

## Carga dataset

In [15]:
df_Stress = pd.read_excel("Copia de Behavioral-features - per minute.xlsx")

## 1.1 primera prueba
* Regresor
* Stratify = codition
* Imputer = mediana
* Escalado StandardScaler
* Eliminadas columnas correlacionadas > 0.8

In [108]:
# Hago una copia del dataframe para no modificar el original
df_stress = df_Stress.copy()
#Eliminamos columnas que no son compatibles con nuestro problema ya que no tendremos esas features en nuestro datos de entrada de nuestra APP
columnas_a_eliminar = ['PP','timestamp','HR','RMSSD', 'RMSSD','SCL','Valence_rc','Arousal_rc',
                       'Dominance','MentalEffort','MentalDemand','PhysicalDemand','TemporalDemand',
                       'Effort','Performance_rc','Frustration','NasaTLX','Squality','Sneutral','Shappy',
                       'Ssad','Sangry','Ssurprised','Sscared','Sdisgusted','Svalence']
df_stress = df_stress.drop(columnas_a_eliminar, axis=1)
#Filtramos filas que tengan condición diferente a 'R'
df_stress = df_stress[df_stress['Condition'] != 'R']
#Dividimos los datos : voy a estrtificar por condition
X = df_stress.drop('Stress', axis=1)
y = df_stress['Stress']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42, stratify=df_stress['Condition']) # probar con stratify = 'Blok'
# Variables categóricas
categorical_features = ['Condition','Blok']
#convertir en categoricas
X_train[categorical_features] = X_train[categorical_features].astype('category')
#Aplicamos one hot encoding
X_train = pd.get_dummies(X_train, columns=categorical_features)
# Variables numéricas
numeric_features = X_train.select_dtypes(include=[np.number]).columns.tolist()
# Imputación de valores nulos
imputer = SimpleImputer(strategy='most_frequent') # probar con media
#Entrenamos el imputer
imputer.fit(X_train[numeric_features])
#Aplicamos la transformación
X_train[numeric_features] = imputer.transform(X_train[numeric_features])
# Normalización
scaler = StandardScaler()
scaler.fit(X_train[numeric_features])
X_train[numeric_features] = scaler.transform(X_train[numeric_features])
# matriz de correlación
corr = X_train.corr().abs()
# Seleccionar las columnas con correlación > 0.8 (excepto la diagonal)
high_corr_cols = set()
for i in range(len(corr.columns)):
    for j in range(i):
        if corr.iloc[i, j] > 0.8:
            colname = corr.columns[i]
            high_corr_cols.add(colname)
# Eliminar las columnas redundantes en X_train
X_train.drop(columns=high_corr_cols, inplace=True)
print(f"Eliminadas {len(high_corr_cols)} columnas altamente correlacionadas entre sí (sin considerar 'Stress').")
print(f"Columnas eliminadas: {list(high_corr_cols)}")

# Aplicamos las mismas transformaciones a X_test
X_test[categorical_features] = X_test[categorical_features].astype('category')
X_test = pd.get_dummies(X_test, columns=categorical_features)
X_test[numeric_features] = imputer.transform(X_test[numeric_features])
X_test[numeric_features] = scaler.transform(X_test[numeric_features])
X_test.drop(columns=high_corr_cols, inplace=True)

# OPTIMIZACIÓN DE HIPERPARÁMETROS CON OPTUNA
def objective(trial):
    # Parámetros a optimizar
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 250, 300, step=50), # probado con 100, 200, 300
        #'max_depth': trial.suggest_int('max_depth', 5, 50),
        #'min_samples_split': trial.suggest_int('min_samples_split', 2, 20), # min_samples_split=2 es el default, no tiene sentido optimizarlo
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 2), 
        #'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']), # esto no optimiza nada
        #'criterion': trial.suggest_categorical('criterion', ['squared_error', 'absolute_error', 'poisson']),
        #'max_samples': trial.suggest_float('max_samples', 0.5, 1.0, step=0.1),
    }
    
    # Modelo
    model = RandomForestRegressor(**params, random_state=42)
    
    # Validación cruzada
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    return -score.mean()

# Crear estudio
study = optuna.create_study(direction='minimize')

# Optimización
study.optimize(objective, n_trials=10)

# Mejor resultado encontrado
print("Mejores parámetros:", study.best_params)
 
# mejor metrica
best_value = study.best_value
print(f"Mejor valor: {best_value}")
#Raiz cuadrada del mejor valor
best_value = np.sqrt(best_value)
print(f"Raíz cuadrada del mejor valor: {best_value}")


[I 2025-03-20 07:51:55,383] A new study created in memory with name: no-name-97357304-f592-466f-88a6-f4f9e1fe28ea


Eliminadas 43 columnas altamente correlacionadas entre sí (sin considerar 'Stress').
Columnas eliminadas: ['HipCenter_SpinePlaneYZAxisZavg', 'ShoulderCenter_ShoulderRightPlaneYZAxisZstdv', 'SnChars', 'ShoulderCenter_ShoulderRightPlaneYZAxisZavg', 'Spine_ShoulderCenterShoulderCenter_ShoulderRightavg', 'Spine_ShoulderCenterPlaneYZAxisZstdv', 'WristLeft_HandLeftPlaneXYAxisYavg', 'Spine_ShoulderCenterShoulderCenter_ShoulderLeftavg', 'Blok_1', 'Spine_ShoulderCenterShoulderCenter_Headavg', 'ShoulderCenter_ShoulderLeftPlaneXYAxisYstdv', 'Spine_ShoulderCenterPlaneXYAxisYstdv', 'ShoulderCenter_ShoulderLeftShoulderLeft_ElbowLeftavg', 'leanAngleavg', 'SnDragged', 'SnSpecialKeys', 'ShoulderCenter_HeadPlaneXYAxisYavg', 'SrightEyeClosed', 'Spine_ShoulderCenterPlaneXYAxisYavg', 'ShoulderCenter_ShoulderRightPlaneZXAxisXavg', 'SAu43_EyesClosed', 'ElbowLeft_WristLeftWristLeft_HandLeftavg', 'HipCenter_SpinePlaneYZAxisZstdv', 'ShoulderCenter_ShoulderRightPlaneXYAxisYstdv', 'SnSpaces', 'ShoulderRight_Elbow

[I 2025-03-20 07:53:35,397] Trial 0 finished with value: 0.9690722359186893 and parameters: {'n_estimators': 250, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9690722359186893.
[I 2025-03-20 07:55:21,737] Trial 1 finished with value: 0.9594231337674411 and parameters: {'n_estimators': 250, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.9594231337674411.
[I 2025-03-20 07:57:02,054] Trial 2 finished with value: 0.9690722359186893 and parameters: {'n_estimators': 250, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9594231337674411.
[I 2025-03-20 07:59:09,577] Trial 3 finished with value: 0.9594423890956054 and parameters: {'n_estimators': 300, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.9594231337674411.
[I 2025-03-20 08:00:49,571] Trial 4 finished with value: 0.9690722359186893 and parameters: {'n_estimators': 250, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9594231337674411.
[I 2025-03-20 08:02:35,571] Trial 5 finished with value: 0.95942313376

Mejores parámetros: {'n_estimators': 250, 'min_samples_leaf': 1}
Mejor valor: 0.9594231337674411
Raíz cuadrada del mejor valor: 0.9795014720598643


## 1.2 Segunda prueba
* Clasificador
* Stratify = codition
* Imputer = mediana
* Sin escalar
* Eliminadas columnas correlacionadas > 0.8


In [111]:
# Hago una copia del dataframe para no modificar el original
df_stress = df_Stress.copy()
#Eliminamos columnas que no son compatibles con nuestro problema ya que no tendremos esas features en nuestro datos de entrada de nuestra APP
columnas_a_eliminar = ['PP','timestamp','HR','RMSSD', 'RMSSD','SCL','Valence_rc','Arousal_rc',
                       'Dominance','MentalEffort','MentalDemand','PhysicalDemand','TemporalDemand',
                       'Effort','Performance_rc','Frustration','NasaTLX','Squality','Sneutral','Shappy',
                       'Ssad','Sangry','Ssurprised','Sscared','Sdisgusted','Svalence']
df_stress = df_stress.drop(columnas_a_eliminar, axis=1)
#Filtramos filas que tengan condición diferente a 'R'
df_stress = df_stress[df_stress['Condition'] != 'R']
#Convierto variable Stress a int
df_stress['Stress'] = df_stress['Stress'].astype(int)
#Dividimos los datos : voy a estrtificar por condition
X = df_stress.drop('Stress', axis=1)
y = df_stress['Stress']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42, stratify=df_stress['Condition']) # probar con stratify = 'Blok'
# Variables categóricas
categorical_features = ['Condition','Blok']
#convertir en categoricas
X_train[categorical_features] = X_train[categorical_features].astype('category')
#Aplicamos one hot encoding
X_train = pd.get_dummies(X_train, columns=categorical_features)
# Variables numéricas
numeric_features = X_train.select_dtypes(include=[np.number]).columns.tolist()
# Imputación de valores nulos
imputer = SimpleImputer(strategy='most_frequent') # probar con media
#Entrenamos el imputer
imputer.fit(X_train[numeric_features])
#Aplicamos la transformación
X_train[numeric_features] = imputer.transform(X_train[numeric_features])
# Normalización
#scaler = StandardScaler()
#scaler.fit(X_train[numeric_features])
#X_train[numeric_features] = scaler.transform(X_train[numeric_features])
# matriz de correlación
corr = X_train.corr().abs()
# Seleccionar las columnas con correlación > 0.8 (excepto la diagonal)
high_corr_cols = set()
for i in range(len(corr.columns)):
    for j in range(i):
        if corr.iloc[i, j] > 0.8:
            colname = corr.columns[i]
            high_corr_cols.add(colname)
# Eliminar las columnas redundantes en X_train
X_train.drop(columns=high_corr_cols, inplace=True)
print(f"Eliminadas {len(high_corr_cols)} columnas altamente correlacionadas entre sí (sin considerar 'Stress').")
print(f"Columnas eliminadas: {list(high_corr_cols)}")

# Aplicamos las mismas transformaciones a X_test
X_test[categorical_features] = X_test[categorical_features].astype('category')
X_test = pd.get_dummies(X_test, columns=categorical_features)
#X_test[numeric_features] = imputer.transform(X_test[numeric_features])
#X_test[numeric_features] = scaler.transform(X_test[numeric_features])
X_test.drop(columns=high_corr_cols, inplace=True)

# OPTIMIZACIÓN DE HIPERPARÁMETROS CON OPTUNA
def objective(trial):
    # Parámetros a optimizar
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 250, 300, step=50), # probado con 100, 200, 300
        #'max_depth': trial.suggest_int('max_depth', 5, 50),
        #'min_samples_split': trial.suggest_int('min_samples_split', 2, 20), # min_samples_split=2 es el default, no tiene sentido optimizarlo
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 2), 
        'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample', None]),
        #'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']), # esto no optimiza nada
        #'criterion': trial.suggest_categorical('criterion', ['squared_error', 'absolute_error', 'poisson']),
        #'max_samples': trial.suggest_float('max_samples', 0.5, 1.0, step=0.1),
    }
    
    # Modelo
    model = RandomForestClassifier(**params, random_state=42)
    
    # Validación cruzada
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    return -score.mean()

# Crear estudio
study = optuna.create_study(direction='minimize')

# Optimización
study.optimize(objective, n_trials=10)

# Mejor resultado encontrado
print("Mejores parámetros:", study.best_params)
 
# mejor metrica
best_value = study.best_value
print(f"Mejor valor: {best_value}")
#Raiz cuadrada del mejor valor
best_value = np.sqrt(best_value)
print(f"Raíz cuadrada del mejor valor: {best_value}")

[I 2025-03-20 08:11:44,677] A new study created in memory with name: no-name-2f6be917-111a-4790-a497-eaf08a1bd1b1


Eliminadas 43 columnas altamente correlacionadas entre sí (sin considerar 'Stress').
Columnas eliminadas: ['HipCenter_SpinePlaneYZAxisZavg', 'ShoulderCenter_ShoulderRightPlaneYZAxisZstdv', 'SnChars', 'ShoulderCenter_ShoulderRightPlaneYZAxisZavg', 'Spine_ShoulderCenterShoulderCenter_ShoulderRightavg', 'Spine_ShoulderCenterPlaneYZAxisZstdv', 'WristLeft_HandLeftPlaneXYAxisYavg', 'Spine_ShoulderCenterShoulderCenter_ShoulderLeftavg', 'Blok_1', 'Spine_ShoulderCenterShoulderCenter_Headavg', 'ShoulderCenter_ShoulderLeftPlaneXYAxisYstdv', 'Spine_ShoulderCenterPlaneXYAxisYstdv', 'ShoulderCenter_ShoulderLeftShoulderLeft_ElbowLeftavg', 'leanAngleavg', 'SnDragged', 'SnSpecialKeys', 'ShoulderCenter_HeadPlaneXYAxisYavg', 'SrightEyeClosed', 'Spine_ShoulderCenterPlaneXYAxisYavg', 'ShoulderCenter_ShoulderRightPlaneZXAxisXavg', 'SAu43_EyesClosed', 'ElbowLeft_WristLeftWristLeft_HandLeftavg', 'HipCenter_SpinePlaneYZAxisZstdv', 'ShoulderCenter_ShoulderRightPlaneXYAxisYstdv', 'SnSpaces', 'ShoulderRight_Elbow

[I 2025-03-20 08:12:02,895] Trial 0 finished with value: 0.4586046511627907 and parameters: {'n_estimators': 300, 'min_samples_leaf': 1, 'class_weight': 'balanced_subsample'}. Best is trial 0 with value: 0.4586046511627907.
[I 2025-03-20 08:12:16,628] Trial 1 finished with value: 0.3483720930232558 and parameters: {'n_estimators': 250, 'min_samples_leaf': 2, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.3483720930232558.
[I 2025-03-20 08:12:34,048] Trial 2 finished with value: 0.3511627906976744 and parameters: {'n_estimators': 300, 'min_samples_leaf': 2, 'class_weight': 'balanced_subsample'}. Best is trial 1 with value: 0.3483720930232558.
[I 2025-03-20 08:12:47,746] Trial 3 finished with value: 0.3483720930232558 and parameters: {'n_estimators': 250, 'min_samples_leaf': 2, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.3483720930232558.
[I 2025-03-20 08:13:05,013] Trial 4 finished with value: 0.43162790697674425 and parameters: {'n_estimators': 300, 'min_samp

Mejores parámetros: {'n_estimators': 250, 'min_samples_leaf': 2, 'class_weight': 'balanced'}
Mejor valor: 0.3483720930232558
Raíz cuadrada del mejor valor: 0.5902305422656945


In [114]:
#Entrenar Modelo con los Mejores Parámetros
# Modelo
model = RandomForestClassifier(**study.best_params, random_state=42)
# Entrenamiento
model.fit(X_train, y_train)
# Predicción
y_pred_train = model.predict(X_train)
# Métricas
mse = mean_squared_error(y_train, y_pred_train)
r2 = r2_score(y_train, y_pred_train)
mae = mean_absolute_error(y_train, y_pred_train)
print(f"Mean Squared Error: {mse}")
print(f"R^2: {r2}")
print(f"Mean Absolute Error: {mae}")



Mean Squared Error: 0.0
R^2: 1.0
Mean Absolute Error: 0.0


In [115]:
#Entrenar Modelo con los Mejores Parámetros
# Modelo
model = RandomForestClassifier(**study.best_params, random_state=42)
# Entrenamiento
model.fit(X_test, y_test)
# Predicción
y_pred_test = model.predict(X_test)
# Métricas
mse = mean_squared_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)
mae = mean_absolute_error(y_test, y_pred_test)
print(f"Mean Squared Error: {mse}")
print(f"R^2: {r2}")
print(f"Mean Absolute Error: {mae}")

Mean Squared Error: 0.016728624535315983
R^2: 0.9964214156323976
Mean Absolute Error: 0.0055762081784386614


# Notebook sin tocar

In [2]:
df_full_dataset = cargar_datos_csv("full_dataset.csv")

# Elimino las filas donde Condition = 'R'
df_full_dataset = df_full_dataset.loc[df_full_dataset['Condition'] != 'R']

Datos cargados correctamente: full_dataset.csv


In [3]:
# Lectura archivo excel para extraer característica estrés
df_Stress = pd.read_excel("Copia de Behavioral-features - per minute.xlsx", usecols=["PP", "Blok", "Condition", "Stress"])  # Carga solo columnas indicadas
# Elimino las filas donde Condition = 'R'
df_Stress = df_Stress.loc[df_Stress['Condition'] != 'R']


In [6]:
# Verificar si las columnas son iguales
iguales = df_full_dataset['PP'].equals(df_Stress['PP']) and df_full_dataset['Condition'].equals(df_Stress['Condition']) and df_full_dataset['Blok'].equals(df_Stress['Blok'])

if iguales:
    print("Las colunas son idénticos")
else:
    print("Las columnas tienen diferencias entre los datasets.")

Las colunas son idénticos


# Importancia de las caracteríticas sin escalado

In [7]:
pd.set_option('display.max_rows', None)  # Todas las filas

In [8]:

# Definimos la variable objetivo (target)
target = df_Stress['Stress'].astype(int)  # Convertimos a entero para asegurar que sea categórica

# Seleccionamos solo las columnas numéricas y eliminamos la variable objetivo
df_full_dataset_num = df_full_dataset.select_dtypes(include=[np.number])

# Manejo de valores NaN (rellenamos con la mediana de cada columna)
X = df_full_dataset_num.fillna(df_full_dataset_num.median())
y = target

# Creación del modelo Random Forest con balanceo de clases
rnd_clf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
rnd_clf.fit(X, y)

# Obtener la importancia de las características
feature_importances = rnd_clf.feature_importances_

# Mostrar resultados ordenados
feature_importance_df_full = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})
feature_importance_df_full = feature_importance_df_full.sort_values(by='Importance', ascending=False)

# Imprimir la importancia de las características

for i, (feature, importance) in enumerate(zip(feature_importance_df_full['Feature'], feature_importance_df_full['Importance']), 1):
    print(f"{i}. {feature}: {round(importance, 4)}")
    

1. SzHeadOrientation: 0.0281
2. avgDepth(avg): 0.0281
3. Blok: 0.0215
4. Squality: 0.0206
5. WristLeft_HandLeft-PlaneZXAxisX(avg): 0.019
6. ShoulderCenter_Head-PlaneYZAxisZ(avg): 0.0174
7. SAu01_InnerBrowRaiser: 0.0159
8. avgDepth(stdv): 0.0157
9. SyHeadOrientation: 0.0157
10. Ssurprised: 0.015
11. WristLeft_HandLeft-PlaneXYAxisY(avg): 0.015
12. WristLeft_HandLeft-PlaneZXAxisX(stdv): 0.0149
13. ShoulderLeft_ElbowLeft-ElbowLeft_WristLeft(avg): 0.0146
14. HipCenter_Spine-Spine_ShoulderCenter(avg): 0.0142
15. ShoulderCenter_ShoulderLeft-PlaneXYAxisY(avg): 0.014
16. WristLeft_HandLeft-PlaneYZAxisZ(avg): 0.0138
17. ShoulderCenter_Head-PlaneZXAxisX(avg): 0.0137
18. leftShoulderAngle(avg): 0.0133
19. Spine_ShoulderCenter-ShoulderCenter_ShoulderLeft(avg): 0.0127
20. ElbowLeft_WristLeft-PlaneYZAxisZ(avg): 0.0127
21. SAu10_UpperLipRaiser: 0.0124
22. SAu06_CheekRaiser: 0.0123
23. SrightEyebrowLowered: 0.0119
24. ShoulderLeft_ElbowLeft-PlaneYZAxisZ(avg): 0.0116
25. Sscared: 0.0113
26. Sangry: 0.01

In [9]:
feature_importance_df_full[0:10]

Unnamed: 0,Feature,Importance
28,SzHeadOrientation,0.028093
58,avgDepth(avg),0.028075
0,Blok,0.021539
17,Squality,0.020639
93,WristLeft_HandLeft-PlaneZXAxisX(avg),0.019006
80,ShoulderCenter_Head-PlaneYZAxisZ(avg),0.017383
39,SAu01_InnerBrowRaiser,0.015911
105,avgDepth(stdv),0.015728
26,SyHeadOrientation,0.015722
22,Ssurprised,0.015021


Los resultados no son satisfactoruios en nungún caso. importancia de características están entre 0% y 2%. Quizá la variable 'Stress' está definiida principalmete por el cuetionario final de cda participante. 
Vamos aprobar en el dataset Behavioral-features, donde está esta variable

Importancia de las características en dataset de variable Stress

In [10]:


# Lectura del archivo Excel
df_Stress = pd.read_excel("Copia de Behavioral-features - per minute.xlsx")

# Eliminamos las filas donde 'Condition' es 'R'
df_Stress = df_Stress.loc[df_Stress['Condition'] != 'R']
print(df_Stress['Condition'].value_counts())

# Definimos la variable objetivo (target)
target = df_Stress['Stress'].astype(int)  # Convertimos a entero para asegurar que sea categórica

# Seleccionamos solo las columnas numéricas y eliminamos la variable objetivo
df_numericas_Stress = df_Stress.select_dtypes(include=['number']).drop(columns=['Stress'], errors='ignore')

# Manejo de valores NaN (rellenamos con la mediana de cada columna)
X = df_numericas_Stress.fillna(df_numericas_Stress.median())
y = target

# Creación del modelo Random Forest con balanceo de clases
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42, class_weight='balanced')
rnd_clf.fit(X, y)

# Obtener la importancia de las características
feature_importances = rnd_clf.feature_importances_

# Mostrar resultados ordenados
feature_importance_df_Stress = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})
feature_importance_df_Stress = feature_importance_df_Stress.sort_values(by='Importance', ascending=False)

# Imprimir la importancia de las características
for i, (feature, importance) in enumerate(zip(feature_importance_df_Stress['Feature'], feature_importance_df_Stress['Importance']), 1):
    print(f"{i}. {feature}: {round(importance, 4)}")



Condition
N    1028
I     996
T     664
Name: count, dtype: int64
1. NasaTLX: 0.0547
2. TemporalDemand: 0.0538
3. Frustration: 0.0533
4. MentalDemand: 0.049
5. Performance_rc: 0.0471
6. Effort: 0.0465
7. MentalEffort: 0.0451
8. Arousal_rc: 0.0353
9. PhysicalDemand: 0.034
10. Valence_rc: 0.032
11. SCL: 0.0245
12. Dominance: 0.0194
13. avgDepthavg: 0.0171
14. Blok: 0.0121
15. SzHeadOrientation: 0.0115
16. ShoulderCenter_HeadPlaneYZAxisZavg: 0.0109
17. Squality: 0.0095
18. Ssurprised: 0.0093
19. SAu01_InnerBrowRaiser: 0.009
20. ShoulderCenter_HeadPlaneZXAxisXavg: 0.0086
21. HipCenter_SpineSpine_ShoulderCenteravg: 0.0085
22. WristLeft_HandLeftPlaneZXAxisXavg: 0.0076
23. leftShoulderAngleavg: 0.0075
24. avgDepthstdv: 0.0075
25. WristLeft_HandLeftPlaneZXAxisXstdv: 0.0073
26. Sdisgusted: 0.0073
27. Spine_ShoulderCenterShoulderCenter_ShoulderLeftavg: 0.0071
28. ShoulderCenter_ShoulderLeftPlaneXYAxisYavg: 0.007
29. WristLeft_HandLeftPlaneYZAxisZavg: 0.007
30. ShoulderLeft_ElbowLeftElbowLeft_Wri

# Resumen

In [11]:
# Imprimir solo los 10 primeros más importantes
print("Top ten feature_importance_df_full : \n")
for i, (feature, importance) in enumerate(zip(feature_importance_df_full['Feature'], feature_importance_df_full['Importance'])):
    if i < 10:  # Solo mostrar los primeros 10
        print(f"{i+1}. {feature}: {round(importance, 4)}")
    else:
        break
print()
print('-'*100)
print()

# Imprimir solo los 10 primeros más importantes
print("Top ten feature_importance_df_Stress : \n")
for i, (feature, importance) in enumerate(zip(feature_importance_df_Stress['Feature'], feature_importance_df_Stress['Importance'])):
    if i < 10:  # Solo mostrar los primeros 10
        print(f"{i+1}. {feature}: {round(importance, 4)}")
    else:
        break
    

Top ten feature_importance_df_full : 

1. SzHeadOrientation: 0.0281
2. avgDepth(avg): 0.0281
3. Blok: 0.0215
4. Squality: 0.0206
5. WristLeft_HandLeft-PlaneZXAxisX(avg): 0.019
6. ShoulderCenter_Head-PlaneYZAxisZ(avg): 0.0174
7. SAu01_InnerBrowRaiser: 0.0159
8. avgDepth(stdv): 0.0157
9. SyHeadOrientation: 0.0157
10. Ssurprised: 0.015

----------------------------------------------------------------------------------------------------

Top ten feature_importance_df_Stress : 

1. NasaTLX: 0.0547
2. TemporalDemand: 0.0538
3. Frustration: 0.0533
4. MentalDemand: 0.049
5. Performance_rc: 0.0471
6. Effort: 0.0465
7. MentalEffort: 0.0451
8. Arousal_rc: 0.0353
9. PhysicalDemand: 0.034
10. Valence_rc: 0.032


In [12]:
# Lectura del archivo Excel
df_Stress = pd.read_excel("Copia de Behavioral-features - per minute.xlsx")
df_numericas_Stress = df_Stress.select_dtypes(include=['number'])
corr_matrix = df_numericas_Stress.corr()
corr_matrix['Stress'].sort_values(ascending=False)

Stress                                                      1.000000
Frustration                                                 0.545560
NasaTLX                                                     0.536578
TemporalDemand                                              0.494337
MentalDemand                                                0.460830
Arousal_rc                                                  0.382362
MentalEffort                                                0.371497
PhysicalDemand                                              0.277444
SCL                                                         0.234045
Effort                                                      0.206962
SAu06_CheekRaiser                                           0.202692
SleftEyebrowLowered                                         0.180202
SAu43_EyesClosed                                            0.180161
avgDepthstdv                                                0.159818
SAu10_UpperLipRaiser              