## <a id = '0'> Índice </a>

* [**Entorno**](#1)  
   * [Librerías](#1d1)  
   * [Funciones](#1d2)  
   * [Constantes](#1d3)

* [**Lectura de datos**](#2)


## <a id = '1'> Entorno </a>
[índice](#0)

### <a id = '1d1'> Librerías </a>

In [1]:
import os
import pandas as pd
import numpy as np

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox

from statsmodels.tsa.arima.model import ARIMA

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX

import matplotlib.pyplot as plt

import math
import itertools
# from config import data_folder

from scipy.stats import kurtosis, trim_mean


In [2]:
os.chdir("../")

### <a id = '1d2'> Funciones </a>

In [3]:
from src.utils import *

### <a id = '1d3'> Constantes </a>

In [4]:
from config.cons import data_folder, window_jason, cara_lateral, cara_anterior, cara_interior, cara_sepal, list_signals

## <a id = '2'> Lectura de datos </a>
[índice](#0)

In [5]:
proyect_path = os.getcwd()
data_path = proyect_path + data_folder

In [6]:
lateral_combinaciones = list(itertools.combinations(cara_lateral, 2))
anterior_combinaciones = list(itertools.combinations(cara_anterior, 2))
interior_combinaciones = list(itertools.combinations(cara_interior, 2))
sepal_combinaciones = list(itertools.combinations(cara_sepal, 2))

In [7]:
# all_combinaciones = list(itertools.combinations(list_signals, 2))
all_combinaciones = list(itertools.combinations_with_replacement(list_signals, 2))
bisignal_combinaciones = list(itertools.combinations(list_signals, 2))

In [8]:
uni_combinacion = []
bi_combinacion = []
for col1, col2 in all_combinaciones:
    if col1 == col2:
        uni_combinacion.append(col1)
    else:
        bi_combinacion.append(col1 + "_" + col2)
dict_combinaciones = {
    "uni_combinacion": uni_combinacion,
    "bi_combinacion": bi_combinacion
}   

In [9]:
signal_combinaciones = [item for item in all_combinaciones if item not in bisignal_combinaciones]


In [10]:
bisignals_dict = {"bisignal_combinaciones": bisignal_combinaciones,
                  "signal_combinaciones": signal_combinaciones,}

In [11]:
df_all = lectura_carpetas_dict(data_path)

In [12]:
df_mi = df_all["mi"]
df_sttc_mi = df_all["sttc_mi"]
df_sttc = df_all["sttc"]
df_other = df_all["other"]

In [13]:
#Lectura de windows
import json
with open(window_jason, "r") as file:
    dict_data_jump = json.load(file)  

In [14]:
patients_mi = pd.DataFrame({'patient_id' : df_mi.keys(),
             'class' : "mi",
             "class_id" : 0})
patients_sttc_mi = pd.DataFrame({'patient_id' : df_sttc_mi.keys(),
             'class' : "sttc_mi",
             "class_id" : 1})
patients_sttc = pd.DataFrame({'patient_id' : df_sttc.keys(),
             'class' : "sttc",
             "class_id" : 2})
patients_other = pd.DataFrame({'patient_id' : df_other.keys(),
             'class' : "other",
             "class_id" : 3})

patients_classes = pd.concat([patients_mi, patients_sttc_mi, patients_sttc, patients_other], axis=0)

## Split Train Test Val

In [15]:

X_train, X_test, X_val, y_train, y_test, y_val = split_train_test_val(patients_classes["patient_id"], patients_classes["class"], sizes = [0.10, 0.20], random_state = 42, stratify = patients_classes["class"])

In [16]:
train_patients = pd.DataFrame({"patient" : X_train, 
              "class": y_train,
              "sample" : "train"})
test_patients = pd.DataFrame({"patient" : X_test, 
              "class": y_test,
              "sample" : "test"})
val_patients = pd.DataFrame({"patient" : X_val, 
              "class": y_val,
              "sample" : "validation"})
patients = pd.concat([train_patients, test_patients, val_patients], axis=0)

In [15]:
patients = pd.read_csv("output/patients.csv")

In [16]:
patients_train = patients[(patients["sample"] == "train")]
patients_test = patients[(patients["sample"] == "test")]
patients_val = patients[(patients["sample"] == "validation")]

In [17]:
patients_train_mi = patients_train[patients_train["class"] == "mi"]["patient"].values
patients_train_sttc_mi = patients_train[patients_train["class"] == "sttc_mi"]["patient"].values
patients_train_sttc = patients_train[patients_train["class"] == "sttc"]["patient"].values
patients_train_other = patients_train[patients_train["class"] == "other"]["patient"].values

In [18]:
df_mi_train = {patient : df_mi[patient] for patient in patients_train_mi}
df_sttc_mi_train = {patient : df_sttc_mi[patient] for patient in patients_train_sttc_mi}
df_sttc_train = {patient : df_sttc[patient] for patient in patients_train_sttc}
df_other_train = {patient : df_other[patient] for patient in patients_train_other}

## Feature Engineering

### Train

#### ACF y PACF lags (5)

In [19]:
df_mi_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_mi_train, list_signals, apply_diff= True))
df_sttc_mi_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_sttc_mi_train, list_signals, apply_diff= True)) 
df_sttc_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_sttc_train, list_signals, apply_diff= True))
df_other_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_other_train, list_signals, apply_diff= True))

In [20]:
df_mi_acf_pacf_train.to_csv("output/features/train/mi_acf_pacf.csv", index = False)
df_sttc_mi_acf_pacf_train.to_csv("output/features/train/sttc_mi_acf_pacf.csv", index = False)
df_sttc_acf_pacf_train.to_csv("output/features/train/sttc_acf_pacf.csv", index = False)
df_other_acf_pacf_train.to_csv("output/features/train/other_acf_pacf.csv", index = False)

#### Pick distribution seasonal

##### distribución en picos (seasonal, TFF, etc)

Número total de picos.

In [23]:
df_mi_train = {patient : df_mi[patient] for patient in patients_train_mi}
df_sttc_mi_train = {patient : df_sttc_mi[patient] for patient in patients_train_sttc_mi}
df_sttc_train = {patient : df_sttc[patient] for patient in patients_train_sttc}
df_other_train = {patient : df_other[patient] for patient in patients_train_other}

In [24]:
df_mi_peak_train = get_dict_serie_summary(df_mi_train, list_signals, 100)
df_sttc_mi_peak_train = get_dict_serie_summary(df_sttc_mi_train, list_signals, 100)
df_sttc_peak_train = get_dict_serie_summary(df_sttc_train, list_signals, 100)
df_other_peak_train = get_dict_serie_summary(df_other_train, list_signals, 100)

In [25]:
df_mi_peak_train.to_csv("output/features/train/mi_peak.csv", index = False)
df_sttc_mi_peak_train.to_csv("output/features/train/sttc_mi_peak.csv", index = False)
df_sttc_peak_train.to_csv("output/features/train/sttc_peak.csv", index = False)
df_other_peak_train.to_csv("output/features/train/other_peak.csv", index = False)

#### Cross Correlation features

Correlaciones cruzadas
* Promedio de correlaciones por combinación.
* Desviación estándar de las correlaciones por combinación.
* Máxima y mínima correlación por combinación.
* Lag de la máxima correlación por combinación.
* Kurtosis.
* Promedio de todas las autocorrelaciones de la matriz.
* Norma de la matriz.
* Número de cruces por cero.

In [26]:
df_mi_ccf_train = patients_dict_ccf(df_mi_train, all_combinaciones)
df_sttc_mi_ccf_train = patients_dict_ccf(df_sttc_mi_train, all_combinaciones)
df_sttc_ccf_train = patients_dict_ccf(df_sttc_train, all_combinaciones)
df_other_ccf_train = patients_dict_ccf(df_other_train, all_combinaciones)


In [None]:
df_mi_ccf_stats_train = get_dict_ccf_summary(df_mi_ccf_train, dict_combinaciones,)
df_sttc_mi_ccf_stats_train = get_dict_ccf_summary(df_sttc_mi_ccf_train, dict_combinaciones,)
df_sttc_ccf_stats_train = get_dict_ccf_summary(df_sttc_ccf_train, dict_combinaciones,)
df_other_ccf_stats_train = get_dict_ccf_summary(df_other_ccf_train, dict_combinaciones,)

df_mi_ccf_stats_train.to_csv("output/features/train/mi_ccf_stats.csv", index = False)
df_sttc_mi_ccf_stats_train.to_csv("output/features/train/sttc_mi_ccf_stats.csv", index = False)
df_sttc_ccf_stats_train.to_csv("output/features/train/sttc_ccf_stats.csv", index = False)
df_other_ccf_stats_train.to_csv("output/features/train/other_ccf_stats.csv", index = False)

### Test

#### ACF y PACF lags (5)

In [21]:
patients_test_mi = patients_test[patients_test["class"] == "mi"]["patient"].values
patients_test_sttc_mi = patients_test[patients_test["class"] == "sttc_mi"]["patient"].values
patients_test_sttc = patients_test[patients_test["class"] == "sttc"]["patient"].values
patients_test_other = patients_test[patients_test["class"] == "other"]["patient"].values

In [22]:
df_mi_test = {patient : df_mi[patient] for patient in patients_test_mi}
df_sttc_mi_test = {patient : df_sttc_mi[patient] for patient in patients_test_sttc_mi}
df_sttc_test = {patient : df_sttc[patient] for patient in patients_test_sttc}
df_other_test = {patient : df_other[patient] for patient in patients_test_other}

In [23]:
df_mi_acf_pacf_test = dict_to_dataframe(genera_df_acf_pacf(df_mi_test, list_signals, apply_diff= True))
df_sttc_mi_acf_pacf_test = dict_to_dataframe(genera_df_acf_pacf(df_sttc_mi_test, list_signals, apply_diff= True)) 
df_sttc_acf_pacf_test = dict_to_dataframe(genera_df_acf_pacf(df_sttc_test, list_signals, apply_diff= True))
df_other_acf_pacf_test = dict_to_dataframe(genera_df_acf_pacf(df_other_test, list_signals, apply_diff= True))

In [24]:
df_mi_acf_pacf_test.to_csv("output/features/test/mi_acf_pacf.csv", index = False)
df_sttc_mi_acf_pacf_test.to_csv("output/features/test/sttc_mi_acf_pacf.csv", index = False)
df_sttc_acf_pacf_test.to_csv("output/features/test/sttc_acf_pacf.csv", index = False)
df_other_acf_pacf_test.to_csv("output/features/test/other_acf_pacf.csv", index = False)

##### distribución en picos (seasonal, TFF, etc)

Número total de picos.

In [33]:
df_mi_test = {patient : df_mi[patient] for patient in patients_test_mi}
df_sttc_mi_test = {patient : df_sttc_mi[patient] for patient in patients_test_sttc_mi}
df_sttc_test = {patient : df_sttc[patient] for patient in patients_test_sttc}
df_other_test = {patient : df_other[patient] for patient in patients_test_other}

In [34]:
df_mi_peak_test = get_dict_serie_summary(df_mi_test, list_signals, 100)
df_sttc_mi_peak_test = get_dict_serie_summary(df_sttc_mi_test, list_signals, 100)
df_sttc_peak_test = get_dict_serie_summary(df_sttc_test, list_signals, 100)
df_other_peak_test = get_dict_serie_summary(df_other_test, list_signals, 100)

In [35]:
df_mi_peak_test.to_csv("output/features/test/mi_peak.csv", index = False)
df_sttc_mi_peak_test.to_csv("output/features/test/sttc_mi_peak.csv", index = False)
df_sttc_peak_test.to_csv("output/features/test/sttc_peak.csv", index = False)
df_other_peak_test.to_csv("output/features/test/other_peak.csv", index = False)

#### Cross Correlation features

Correlaciones cruzadas
* Promedio de correlaciones por combinación.
* Desviación estándar de las correlaciones por combinación.
* Máxima y mínima correlación por combinación.
* Lag de la máxima correlación por combinación.
* Kurtosis.
* Promedio de todas las autocorrelaciones de la matriz.
* Norma de la matriz.
* Número de cruces por cero.

In [36]:
df_mi_ccf_test = patients_dict_ccf(df_mi_test, all_combinaciones)
df_sttc_mi_ccf_test = patients_dict_ccf(df_sttc_mi_test, all_combinaciones)
df_sttc_ccf_test = patients_dict_ccf(df_sttc_test, all_combinaciones)
df_other_ccf_test = patients_dict_ccf(df_other_test, all_combinaciones)

In [None]:
df_mi_ccf_stats_test = get_dict_ccf_summary(df_mi_ccf_test, dict_combinaciones,)
df_sttc_mi_ccf_stats_test = get_dict_ccf_summary(df_sttc_mi_ccf_test, dict_combinaciones,)
df_sttc_ccf_stats_test = get_dict_ccf_summary(df_sttc_ccf_test, dict_combinaciones,)
df_other_ccf_stats_test = get_dict_ccf_summary(df_other_ccf_test, dict_combinaciones,)

In [None]:
df_mi_ccf_stats_test.to_csv("output/features/test/mi_ccf_stats.csv", index = False)
df_sttc_mi_ccf_stats_test.to_csv("output/features/test/sttc_mi_ccf_stats.csv", index = False)
df_sttc_ccf_stats_test.to_csv("output/features/test/sttc_ccf_stats.csv", index = False)
df_other_ccf_stats_test.to_csv("output/features/test/other_ccf_stats.csv", index = False)

### Val

#### ACF y PACF lags (5)

In [25]:
patients_val_mi = patients_val[patients_val["class"] == "mi"]["patient"].values
patients_val_sttc_mi = patients_val[patients_val["class"] == "sttc_mi"]["patient"].values
patients_val_sttc = patients_val[patients_val["class"] == "sttc"]["patient"].values
patients_val_other = patients_val[patients_val["class"] == "other"]["patient"].values

In [26]:
df_mi_val = {patient : df_mi[patient] for patient in patients_val_mi}
df_sttc_mi_val = {patient : df_sttc_mi[patient] for patient in patients_val_sttc_mi}
df_sttc_val = {patient : df_sttc[patient] for patient in patients_val_sttc}
df_other_val = {patient : df_other[patient] for patient in patients_val_other}


In [27]:
df_mi_acf_pacf_val = dict_to_dataframe(genera_df_acf_pacf(df_mi_val, list_signals, apply_diff= True))
df_sttc_mi_acf_pacf_val = dict_to_dataframe(genera_df_acf_pacf(df_sttc_mi_val, list_signals, apply_diff= True)) 
df_sttc_acf_pacf_val = dict_to_dataframe(genera_df_acf_pacf(df_sttc_val, list_signals, apply_diff= True))
df_other_acf_pacf_val = dict_to_dataframe(genera_df_acf_pacf(df_other_val, list_signals, apply_diff= True))

In [28]:
df_mi_acf_pacf_val.to_csv("output/features/val/mi_acf_pacf.csv", index = False)
df_sttc_mi_acf_pacf_val.to_csv("output/features/val/sttc_mi_acf_pacf.csv", index = False)
df_sttc_acf_pacf_val.to_csv("output/features/val/sttc_acf_pacf.csv", index = False)
df_other_acf_pacf_val.to_csv("output/features/val/other_acf_pacf.csv", index = False)

##### distribución en picos (seasonal, TFF, etc)

Número total de picos.

In [43]:
df_mi_val = {patient : df_mi[patient] for patient in patients_val_mi}
df_sttc_mi_val = {patient : df_sttc_mi[patient] for patient in patients_val_sttc_mi}
df_sttc_val = {patient : df_sttc[patient] for patient in patients_val_sttc}
df_other_val = {patient : df_other[patient] for patient in patients_val_other}

In [44]:
df_mi_peak_val = get_dict_serie_summary(df_mi_val, list_signals, 100)
df_sttc_mi_peak_val = get_dict_serie_summary(df_sttc_mi_val, list_signals, 100)
df_sttc_peak_val = get_dict_serie_summary(df_sttc_val, list_signals, 100)
df_other_peak_val = get_dict_serie_summary(df_other_val, list_signals, 100)

In [45]:
df_mi_peak_val.to_csv("output/features/val/mi_peak.csv", index = False)
df_sttc_mi_peak_val.to_csv("output/features/val/sttc_mi_peak.csv", index = False)
df_sttc_peak_val.to_csv("output/features/val/sttc_peak.csv", index = False)
df_other_peak_val.to_csv("output/features/val/other_peak.csv", index = False)

#### Cross Correlation features

Correlaciones cruzadas
* Promedio de correlaciones por combinación.
* Desviación estándar de las correlaciones por combinación.
* Máxima y mínima correlación por combinación.
* Lag de la máxima correlación por combinación.
* Kurtosis.
* Promedio de todas las autocorrelaciones de la matriz.
* Norma de la matriz.
* Número de cruces por cero.

In [46]:
df_mi_ccf_val = patients_dict_ccf(df_mi_val, all_combinaciones)
df_sttc_mi_ccf_val = patients_dict_ccf(df_sttc_mi_val, all_combinaciones)
df_sttc_ccf_val = patients_dict_ccf(df_sttc_val, all_combinaciones)
df_other_ccf_val = patients_dict_ccf(df_other_val, all_combinaciones)

In [None]:
df_mi_ccf_stats_val = get_dict_ccf_summary(df_mi_ccf_val, dict_combinaciones,)
df_sttc_mi_ccf_stats_val = get_dict_ccf_summary(df_sttc_mi_ccf_val, dict_combinaciones,)
df_sttc_ccf_stats_val = get_dict_ccf_summary(df_sttc_ccf_val, dict_combinaciones,)
df_other_ccf_stats_val = get_dict_ccf_summary(df_other_ccf_val, dict_combinaciones,)

In [None]:
df_mi_ccf_stats_val.to_csv("output/features/val/mi_ccf_stats.csv", index = False)
df_sttc_mi_ccf_stats_val.to_csv("output/features/val/sttc_mi_ccf_stats.csv", index = False)
df_sttc_ccf_stats_val.to_csv("output/features/val/sttc_ccf_stats.csv", index = False)
df_other_ccf_stats_val.to_csv("output/features/val/other_ccf_stats.csv", index = False)