# Explicación de variables por gesto


<div style="text-align: center;">
<img src="../Imagenes/gestures.png" width="500"/>
</div>

In [24]:
# importar librerias
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import functions as fn
import numpy as np

# scikit-learn (ML en python)
## Procesar el dataset
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import LeaveOneGroupOut # LeavePGroupsOut
## Modelos ML
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
## Evaluación de los modelos
from sklearn.metrics import *
from sklearn.model_selection import cross_val_score
## Hiperparametrizacion
from sklearn.model_selection import RandomizedSearchCV

# Class imbalance
from imblearn.under_sampling import NearMiss, EditedNearestNeighbours
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.combine import SMOTETomek, SMOTEENN
from collections import Counter

## Seleccion de variables
from sklearn.feature_selection import SelectKBest, f_classif

# Pipeline
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.compose import ColumnTransformer

import warnings
from sklearn.exceptions import InconsistentVersionWarning

# Suppress the specific warning
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)

******
## Cálculo de distancia entre puntos y caracterización previa de los movimientos

Es decir, para corregir una elevación lateral de hombro no necesito calcular los ángulos de las piernas, ni del brazo contrario ya que pueden insertar ruido.
Es decir, se podría controlar la postura del paciente ya que cuando ejerce el movimiento la distancia entre hombros (por ejemplo) debería de ser constante, si no significa que se está curvando ... Esto se podría ver para ver si tuviera alguna mejora el modelo.

In [25]:
# Dataframe de los datos en bruto
df_raw = pd.read_csv('../Resultados/raw_pacientes.csv', dtype=object)
df_raw

Unnamed: 0,SubjectID,GestureLabel,GestureName,RepetitionNumber,CorrectLabel,Position,JointName,3D_X,3D_Y,3D_Z
0,102,0,EFL,10,1,chair,SpineBase,-0.08088344,-0.2248836,2.661578
1,102,0,EFL,10,1,chair,SpineMid,-0.06806522,0.04492111,2.628779
2,102,0,EFL,10,1,chair,Neck,-0.055614,0.3082771,2.583972
3,102,0,EFL,10,1,chair,Head,-0.04478608,0.4328104,2.593495
4,102,0,EFL,10,1,chair,ShoulderLeft,-0.2232155,0.2021449,2.549825
...,...,...,...,...,...,...,...,...,...,...
5500495,307,8,STR,9,1,stand,SpineShoulder,-0.05799517,0.5291457,2.422904
5500496,307,8,STR,9,1,stand,HandTipLeft,-0.302538,-0.1131345,2.284269
5500497,307,8,STR,9,1,stand,ThumbLeft,-0.2783904,-0.06298634,2.269769
5500498,307,8,STR,9,1,stand,HandTipRight,0.1525867,-0.136378,2.45287


### Funciones creadas

In [26]:
def calcular_distancia(df: pd.DataFrame, joint_a: str, joint_b: str):
    # Extraer posiciones de los keypoints
    positions = df.set_index('JointName')[['3D_X', '3D_Y', '3D_Z']].loc[[joint_a, joint_b]]

    # Convertir las posiciones a tipo numérico
    positions = positions.apply(pd.to_numeric)

    # Vector u (joint_a to joint_b) y Vector v (joint_b to joint_c)
    u = np.array([positions.iloc[1, 0] - positions.iloc[0, 0],
                  positions.iloc[1, 1] - positions.iloc[0, 1],
                  positions.iloc[1, 2] - positions.iloc[0, 2]])

    modulo_u = np.linalg.norm(u)

    return modulo_u


In [27]:
# dividir en test y train df_gestures
def split_df_gestures(df:pd.DataFrame, target: str):
    """
    Divide un DataFrame en conjuntos de entrenamiento y prueba basados en los valores de 'SubjectID'.

    Parámetros
    ----------
    df : pd.DataFrame
        DataFrame que contiene los datos completos incluyendo características y el target.
    target : str
        Nombre de la columna objetivo que contiene las etiquetas de la clase.

    Return
    -------
    X_train : pd.DataFrame
        Conjunto de datos de entrenamiento con las características (sin la columna objetivo).
    X_test : pd.DataFrame
        Conjunto de datos de prueba con las características (sin la columna objetivo).
    y_train : np.ndarray
        Array 1D con las etiquetas del conjunto de entrenamiento.
    y_test : np.ndarray
        Array 1D con las etiquetas del conjunto de prueba.
    """
    # Selecciona los datos de prueba (varios sujetos)
    test_df = df.loc[(df.SubjectID==102) | (df.SubjectID==105) | (df.SubjectID==201) |
                (df.SubjectID==202) | (df.SubjectID==205) | (df.SubjectID==211) |(df.SubjectID==301) | (df.SubjectID==302)] 

    # Selecciona los datos de entrenamiento excluyendo los mismos sujetos
    train_df = df.loc[(df.SubjectID!=102) & (df.SubjectID!=105)  & (df.SubjectID!=201) &
                    (df.SubjectID!=202) & (df.SubjectID!=205) & (df.SubjectID!=211) & (df.SubjectID!=301) & (df.SubjectID!=302)]

    # Separa las características y las etiquetas en el conjunto de entrenamiento
    X_train = train_df.drop([target], axis=1)  
    y_train = pd.DataFrame(train_df[target])   

    # Separa las características y las etiquetas en el conjunto de prueba
    X_test = test_df.drop([target], axis=1)  
    y_test = pd.DataFrame(test_df[target])   

    # Convierte los DataFrames de etiquetas a arrays 1D
    y_train = y_train.values.ravel()
    y_test = y_test.values.ravel()

    # Devuelve los conjuntos de entrenamiento y prueba
    return X_train, X_test, y_train, y_test

In [28]:
# Definir la malla de parámetros
param_grid = [
    {
        'balance_data': [SMOTETomek(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0)),
                         SMOTEENN(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0))],
        'balance_data__smote__k_neighbors': [3, 4],
        'select_features__k': list(range(5, 15)),
        'classifier': [KNeighborsClassifier()],
        'classifier__n_neighbors': [2, 3, 5], 
        'classifier__weights': ['uniform', 'distance'], 
        'classifier__algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']  
    },
    {
        'balance_data': [SMOTETomek(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0)),
                         SMOTEENN(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0))],
        'balance_data__smote__k_neighbors': [3, 4],
        'select_features__k': list(range(5, 15)),
        'classifier': [DecisionTreeClassifier()],
        'classifier__criterion': ['gini', 'entropy'],
        'classifier__max_depth': [None, 10, 20, 30]
    },
    {
        'balance_data': [SMOTETomek(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0)),
                         SMOTEENN(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0))],
        'balance_data__smote__k_neighbors': [3, 4],
        'select_features__k': list(range(5, 15)),
        'classifier': [GaussianNB()],
        'classifier__var_smoothing': np.logspace(0, -8, num=100)
    },
    {
        'balance_data': [SMOTETomek(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0)),
                         SMOTEENN(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0))],
        'balance_data__smote__k_neighbors': [3, 4],
        'select_features__k': list(range(5, 15)),
        'classifier': [SVC(probability=True)],
        'classifier__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'classifier__C': [0.1, 1, 10, 100]
    },
    {
        'balance_data': [SMOTETomek(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0)),
                         SMOTEENN(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0))],
        'balance_data__smote__k_neighbors': [3, 4],
        'select_features__k': list(range(5, 15)),
        'classifier': [RandomForestClassifier()],
        'classifier__n_estimators': [200, 300],
        'classifier__max_features': ['sqrt', 'log2']
    },
    {
        'balance_data': [SMOTETomek(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0)),
                         SMOTEENN(smote=SMOTE(k_neighbors=2, sampling_strategy=1.0))],
        'balance_data__smote__k_neighbors': [3, 4],
        'select_features__k': list(range(5, 15)),
        'classifier': [LogisticRegression(max_iter=500)],
        'classifier__C': [0.01, 0.1, 1, 10, 100]
    }
]

In [29]:
gestures = df_raw['GestureName'].unique()


for gesture in gestures:
    df_gesture = df_raw[df_raw['GestureName'] == gesture]

    angles = []

    #------- Calcular angulos y distancia segun gesto ---------
    print(f'Calculando para gesto {gesture}')
    if gesture == 'EFL' or gesture == 'SFL' or gesture == 'SAL' or gesture == 'SFE':
        for _, group in df_gesture.groupby(np.arange(len(df_gesture)) // 25):
            additional_data = group.iloc[0][['SubjectID', 'GestureLabel', 'GestureName', 'RepetitionNumber', 'CorrectLabel', 'Position']]

            elbow_angle_left = fn.calculate_angle(group, 'ShoulderLeft', 'ElbowLeft', 'WristLeft')
            left_arm_angle = fn.calculate_angle(group, 'HipLeft', 'ShoulderLeft', 'ElbowLeft')
            wrist_angle_left = fn.calculate_angle(group, 'ElbowLeft', 'WristLeft', 'HandLeft')
            arms_together_angle = fn.calculate_angle(group, 'SpineBase', 'SpineShoulder', 'WristLeft')
            shoulder_angle_left = fn.calculate_angle(group, 'ShoulderLeft', 'SpineShoulder', 'ElbowLeft')

            elbows_distance = calcular_distancia(group, 'ShoulderLeft', 'ShoulderRight')
            hips_distance = calcular_distancia(group, 'HipLeft', 'HipRight')
            foots_distance = calcular_distancia(group, 'FootLeft', 'FootRight')
            head_distance = calcular_distancia(group, 'SpineShoulder', 'Head')

            # Almacenar la información en un diccionario
            angles.append({
                **additional_data,
                'ElbowAngleLeft': elbow_angle_left,
                'ShoulderAngleLeft': shoulder_angle_left,
                'WristAngleLeft': wrist_angle_left,
                'LeftArmAngle': left_arm_angle,
                'ArmsTogetherAngle': arms_together_angle,
                'ElbowsDistance': elbows_distance,
                'HipsDistance': hips_distance,
                'FootsDistance': foots_distance,
                'HeadDistance': head_distance
            })


    elif gesture == 'EFR'or gesture == 'SFR' or gesture == 'SAR':
        for _, group in df_gesture.groupby(np.arange(len(df_gesture)) // 25):
            additional_data = group.iloc[0][['SubjectID', 'GestureLabel', 'GestureName', 'RepetitionNumber', 'CorrectLabel', 'Position']]

            elbow_angle_right = fn.calculate_angle(group, 'ShoulderRight', 'ElbowRight', 'WristRight')
            right_arm_angle = fn.calculate_angle(group, 'HipRight', 'ShoulderRight', 'ElbowRight')
            wrist_angle_right = fn.calculate_angle(group, 'ElbowRight', 'WristRight', 'HandRight')
            arms_together_angle = fn.calculate_angle(group, 'SpineBase', 'SpineShoulder', 'WristRight')
            shoulder_angle_right = fn.calculate_angle(group, 'ShoulderRight', 'SpineShoulder', 'ElbowRight')

            elbows_distance = calcular_distancia(group, 'ShoulderLeft', 'ShoulderRight')
            hips_distance = calcular_distancia(group, 'HipLeft', 'HipRight')
            foots_distance = calcular_distancia(group, 'FootLeft', 'FootRight')

            angles.append({
                **additional_data,
                'ElbowAngleRight': elbow_angle_right,
                'ShoulderAngleRight': shoulder_angle_right,
                'WristAngleRight': wrist_angle_right,
                'RightArmAngle': right_arm_angle,
                'ArmsTogetherAngle': arms_together_angle,
                'ElbowsDistance': elbows_distance,
                'HipsDistance': hips_distance,
                'FootsDistance': foots_distance,
                'HeadDistance': head_distance
            })

    elif gesture == 'STL':
        for _, group in df_gesture.groupby(np.arange(len(df_gesture)) // 25):
            additional_data = group.iloc[0][['SubjectID', 'GestureLabel', 'GestureName', 'RepetitionNumber', 'CorrectLabel', 'Position']]

            hip_angle_left = fn.calculate_angle(group, 'HipLeft', 'SpineBase', 'KneeLeft')
            knee_angle_left = fn.calculate_angle(group, 'HipLeft', 'KneeLeft', 'AnkleLeft')
            ankle_angle_left = fn.calculate_angle(group, 'KneeLeft', 'AnkleLeft', 'FootLeft')

            elbows_distance = calcular_distancia(group, 'ShoulderLeft', 'ShoulderRight')
            head_distance = calcular_distancia(group, 'SpineShoulder', 'Head')

            angles.append({
                **additional_data,
                'HipAngleLeft': hip_angle_left,
                'KneeAngleLeft': knee_angle_left,
                'AnkleAngleLeft': ankle_angle_left,
                'ElbowsDistance': elbows_distance,
                'HeadDistance': head_distance
            })

    elif gesture == 'STR':
        for _, group in df_gesture.groupby(np.arange(len(df_gesture)) // 25):
            additional_data = group.iloc[0][['SubjectID', 'GestureLabel', 'GestureName', 'RepetitionNumber', 'CorrectLabel', 'Position']]

            hip_angle_right = fn.calculate_angle(group, 'HipRight', 'SpineBase', 'KneeRight')
            knee_angle_right = fn.calculate_angle(group, 'HipRight', 'KneeRight', 'AnkleRight')
            ankle_angle_right = fn.calculate_angle(group, 'KneeRight', 'AnkleRight', 'FootRight')

            elbows_distance = calcular_distancia(group, 'ShoulderLeft', 'ShoulderRight')
            head_distance = calcular_distancia(group, 'SpineShoulder', 'Head')

            angles.append({
                **additional_data,
                'HipAngleRight': hip_angle_right,
                'KneeAngleRight': knee_angle_right,
                'AnkleAngleRight': ankle_angle_right,
                'ElbowsDistance': elbows_distance,
                'HeadDistance': head_distance
            })

     
    # Crear un DataFrame a partir de la lista de diccionarios
    df_angles = pd.DataFrame(angles)
    print(df_angles)

    # ---------- Agrupar por repeticion ----------
     # Agrupa el DataFrame 
    groups = df_angles.groupby(["SubjectID", "RepetitionNumber"])

    # Lista para almacenar los datos de salida
    data = []

    # Itera sobre cada grupo
    for (subject_id, repetition_number), group in groups:
        # Selecciona solo las columnas que contienen los ángulos 
        angles = group.iloc[:, 6:]

        # Calcula la media y la desviación estándar para los ángulos
        means = angles.mean()
        std_devs = angles.std()

        # Almacena las estadísticas en un diccionario 
        data.append({
            'SubjectID': subject_id,
            'RepetitionNumber': repetition_number,
            'Position': group['Position'].iloc[0],
            'Duration': len(group),  # Duración en número de frames
            'standardDeviation': std_devs,
            'Maximum': angles.max(),
            'Minimum': angles.min(),
            'Mean': means,
            'Range': angles.max() - angles.min(),
            'Variance': angles.var(),
            'CoV': std_devs / means,  # Coeficiente de variación
            'Skewness': angles.skew(),  # Asimetría
            'Kurtosis': angles.kurtosis()  # Curtosis
        })

    # Convierte la lista de diccionarios en un DataFrame y lo ordena
    df_stats = pd.DataFrame(data)
    df_stats['RepetitionNumber'] = pd.to_numeric(df_stats['RepetitionNumber'], errors='coerce')
    df_stats = df_stats.sort_values(['RepetitionNumber'])



    columnas = ['standardDeviation', 'Maximum', 'Minimum', 'Mean', 'Range',
                    'Variance', 'CoV', 'Skewness', 'Kurtosis']

    nuevas_columnas = pd.concat([fn.formatear_columnas(df_stats[col], col) for col in columnas], axis=1)

    df_stats = pd.concat([df_stats, nuevas_columnas], axis=1)

    df_stats = df_stats.drop(columnas, axis=1)

    print(df_stats)

    # --------- Copia fase 2 ---------
    

Calculando para gesto EFL


KeyboardInterrupt: 

*****
## Archivo variables seleccionadas y gráficas de correlaciones

In [58]:
#---------Importar los datos-----------
# Dataframe medidas calculadas por repetición
df = pd.read_csv('../Resultados/medidasPerRepetition.csv', dtype=object)

# codificar variables categoricas
encoder = OrdinalEncoder(categories=[list(set(df["Position"].values))])
encoder.fit(df[["Position"]])
df["Position"] = encoder.transform(df[["Position"]])


# cambiar las poorly executed to incorrectly executed
df.loc[df['CorrectLabel'] == 3, 'CorrectLabel'] = 2

In [None]:
gestures = df['GestureName'].unique()

with open('../Resultados/fn_x_gesture.txt', 'w') as file:
    for gesture in gestures:

        df_gesture = df[df['GestureName'] == gesture]
        X = df_gesture.drop(['CorrectLabel'], axis=1)
        y = pd.DataFrame(df_gesture['CorrectLabel'])

        modelo_gesto_path = f'../Resultados/modelo_{gesture}.sav'
        best_pipeline, expected_columns = joblib.load(modelo_gesto_path)

        # Step 1: Get feature names from the 'preprocessing' step
        preprocessor = best_pipeline.named_steps['preprocessing']

        # Extract the feature names after preprocessing (OrdinalEncoder + remainder columns)
        feature_names = preprocessor.get_feature_names_out()

        # Step 2: Access the SelectKBest step and get the selected features
        select_kbest = best_pipeline.named_steps['select_features']

        # Get the boolean mask for the selected features
        selected_features_mask = select_kbest.get_support()

        # Use the mask to extract the selected feature names
        selected_features = feature_names[selected_features_mask]
        selected_features = [name.replace('remainder__', '') for name in selected_features]
        selected_features = [name.replace('encoder__', '') for name in selected_features]
        
        # Crear un DataFrame con las características seleccionadas y la variable objetivo
        selected_data = X[selected_features].copy()
        selected_data.loc[:, 'Target'] = y

        # Calcular la correlación entre cada característica seleccionada y la variable objetivo
        correlations = selected_data.corr()['Target'].drop('Target')  # Excluir la correlación consigo mismo

        # Visualizar las correlaciones en un gráfico de barras
        plt.figure(figsize=(10, 8))
        correlations.sort_values(ascending=False).plot(kind='bar')
        plt.title(f'Correlación de características seleccionadas con el target para {gesture}')
        plt.ylabel('Correlación')
        plt.xlabel('Características')
        plt.ylim(-1, 1)
        plt.show()

        file.write(f'\n------------ VARIABLES GESTO {gesture} -----------\n')

        for row in selected_features:
            file.write(''.join(map(str, row)) + '\n')
