## <a id = '0'> Índice </a>

* [**Entorno**](#1)  
   * [Librerías](#1d1)  
   * [Funciones](#1d2)  
   * [Constantes](#1d3)

* [**Lectura de datos**](#2)


## <a id = '1'> Entorno </a>
[índice](#0)

### <a id = '1d1'> Librerías </a>

In [2]:
import os
import pandas as pd
import numpy as np

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox

from statsmodels.tsa.arima.model import ARIMA

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX

import matplotlib.pyplot as plt

import math
import itertools

In [3]:
os.chdir("../")

### <a id = '1d2'> Funciones </a>

In [4]:
from src.utils import *

### <a id = '1d3'> Constantes </a>

In [5]:
from config.cons import data_folder, window_jason, cara_lateral, cara_anterior, cara_interior, cara_sepal, list_signals

## <a id = '2'> Lectura de datos </a>
[índice](#0)

In [6]:
proyect_path = os.getcwd()
data_path = proyect_path + data_folder

In [7]:
lateral_combinaciones = list(itertools.combinations(cara_lateral, 2))
anterior_combinaciones = list(itertools.combinations(cara_anterior, 2))
interior_combinaciones = list(itertools.combinations(cara_interior, 2))
sepal_combinaciones = list(itertools.combinations(cara_sepal, 2))

In [8]:
df_all = lectura_carpetas_dict(data_path)

In [9]:
df_mi = df_all["mi"]
df_sttc_mi = df_all["sttc_mi"]
df_sttc = df_all["sttc"]
df_other = df_all["other"]

In [10]:
#Lectura de windows
import json
with open(window_jason, "r") as file:
    dict_data_jump = json.load(file)  

In [11]:
df_mi_smooth_part = dict_apply_smooth(df_mi, list_signals, dict_window = dict_data_jump["MI"])
df_sttc_mi_smooth_part = dict_apply_smooth(df_sttc_mi, list_signals, dict_window = dict_data_jump['STTC MI'])
df_sttc_smooth_part = dict_apply_smooth(df_sttc, list_signals, dict_window = dict_data_jump['STTC'])
df_other_smooth_part = dict_apply_smooth(df_other, list_signals, dict_window = dict_data_jump['OTHER'])

In [12]:
df_mi_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_mi, list_signals, apply_diff= True))
df_sttc_mi_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_sttc_mi, list_signals, apply_diff= True)) 
df_sttc_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_sttc, list_signals, apply_diff= True))
df_other_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_other, list_signals, apply_diff= True))

In [13]:
df_mi_s_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_mi_smooth_part, list_signals, apply_diff=True))
df_sttc_mi_s_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_sttc_mi_smooth_part, list_signals, apply_diff=True))
df_sttc_s_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_sttc_smooth_part, list_signals, apply_diff=True))
df_other_s_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_other_smooth_part, list_signals, apply_diff=True))

In [14]:
df_acf_pacf = pd.concat([df_mi_acf_pacf,
                        df_sttc_mi_acf_pacf,
                        df_sttc_acf_pacf,
                        df_other_acf_pacf], ignore_index=True)

df_smooth_acf_pacf = pd.concat([df_mi_s_acf_pacf,
                        df_sttc_mi_s_acf_pacf,
                        df_sttc_s_acf_pacf,
                        df_other_s_acf_pacf], ignore_index=True)

In [15]:
df_acf_pacf.head()

Unnamed: 0,acf_AVL_lag_1,acf_AVL_lag_2,acf_AVL_lag_3,acf_AVL_lag_4,acf_AVL_lag_5,pacf_AVL_lag_1,pacf_AVL_lag_2,pacf_AVL_lag_3,pacf_AVL_lag_4,pacf_AVL_lag_5,...,acf_I_lag_3,acf_I_lag_4,acf_I_lag_5,pacf_I_lag_1,pacf_I_lag_2,pacf_I_lag_3,pacf_I_lag_4,pacf_I_lag_5,patient_id,label
0,0.401996,-0.363673,-0.454023,-0.176922,0.06264,0.402398,-0.628019,0.071025,-0.264127,0.003779,...,-0.489441,-0.186072,0.059897,0.445882,-0.684347,0.180036,-0.301687,-0.00116,patient_2415,mi
1,0.595239,0.182054,-0.041543,-0.235254,-0.18599,0.595836,-0.267607,-0.033394,-0.235717,0.158385,...,0.02425,-0.187993,-0.155805,0.51311,-0.185329,0.059501,-0.290966,0.137827,patient_3086,mi
2,0.640743,0.258573,0.105409,-0.048373,-0.097295,0.641385,-0.258708,0.113775,-0.219842,0.10301,...,-0.18684,-0.149321,-0.122184,0.480558,-0.461351,0.200363,-0.295759,0.103905,patient_2171,mi
3,0.62984,0.041866,-0.284973,-0.376588,-0.307821,0.630471,-0.590107,0.112449,-0.294009,0.041728,...,-0.21417,-0.390316,-0.38201,0.684529,-0.589648,0.057414,-0.257395,0.024389,patient_1450,mi
4,0.498792,-0.222485,-0.371463,-0.191408,-0.072291,0.499292,-0.629038,0.270723,-0.357211,0.061542,...,-0.330432,-0.109626,-0.028044,0.431913,-0.599114,0.259065,-0.361061,0.118161,patient_944,mi


In [14]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

In [15]:
# df_mi_acf_pacf = dict_to_dataframe(genera_df_acf_pacf(df_mi, list_signals))
df_acf_pacf_test = df_acf_pacf.copy()

In [16]:
test_columns = ['acf_II_lag_1', 'acf_II_lag_2', 'acf_II_lag_3', 'acf_II_lag_4',
       'acf_II_lag_5', 'pacf_II_lag_1', 'pacf_II_lag_2', 'pacf_II_lag_3',
       'pacf_II_lag_4', 'pacf_II_lag_5']

In [21]:
kmeans = KMeans(n_clusters=4, random_state=42)
# df_acf_pacf['cluster'] = kmeans.fit_predict(df_acf_pacf.drop(columns=["patient_id", "label"]))
df_acf_pacf_test['cluster'] = kmeans.fit_predict(df_acf_pacf_test.filter(like='acf', axis=1))


In [22]:
df_acf_pacf_test.groupby(["label", "cluster"]).count()["patient_id"]

label    cluster
mi       0          190
         1           49
         2          150
         3          211
other    0          243
         1           21
         2           37
         3          299
sttc     0          209
         1           18
         2          112
         3          261
sttc_mi  0          145
         1           65
         2          215
         3          175
Name: patient_id, dtype: int64