## <a id = '0'> Índice </a>

* [**Entorno**](#1)  
   * [Librerías](#1d1)  
   * [Funciones](#1d2)  
   * [Constantes](#1d3)

* [**Lectura de datos**](#2)


## <a id = '1'> Entorno </a>
[índice](#0)

### <a id = '1d1'> Librerías </a>

In [1]:
import os
import pandas as pd
import numpy as np

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox

from statsmodels.tsa.arima.model import ARIMA

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX

import matplotlib.pyplot as plt

import math
import itertools
# from config import data_folder

In [2]:
os.chdir("../")

### <a id = '1d2'> Funciones </a>

In [3]:
from src.utils import *

### <a id = '1d3'> Constantes </a>

In [4]:
from config.cons import data_folder, window_jason, cara_lateral, cara_anterior, cara_interior, cara_sepal, list_signals

## <a id = '2'> Lectura de datos </a>
[índice](#0)

In [5]:
proyect_path = os.getcwd()
data_path = proyect_path + data_folder

In [6]:
lateral_combinaciones = list(itertools.combinations(cara_lateral, 2))
anterior_combinaciones = list(itertools.combinations(cara_anterior, 2))
interior_combinaciones = list(itertools.combinations(cara_interior, 2))
sepal_combinaciones = list(itertools.combinations(cara_sepal, 2))

In [7]:
df_all = lectura_carpetas_dict(data_path)

In [8]:
df_mi = df_all["mi"]
df_sttc_mi = df_all["sttc_mi"]
df_sttc = df_all["sttc"]
df_other = df_all["other"]

In [9]:
#Lectura de windows
import json
with open(window_jason, "r") as file:
    dict_data_jump = json.load(file)  

In [10]:
patients_mi = pd.DataFrame({'patient_id' : df_mi.keys(),
             'class' : "mi",
             "class_id" : 0})
patients_sttc_mi = pd.DataFrame({'patient_id' : df_sttc_mi.keys(),
             'class' : "sttc_mi",
             "class_id" : 1})
patients_sttc = pd.DataFrame({'patient_id' : df_sttc.keys(),
             'class' : "sttc",
             "class_id" : 2})
patients_other = pd.DataFrame({'patient_id' : df_other.keys(),
             'class' : "other",
             "class_id" : 3})

patients_classes = pd.concat([patients_mi, patients_sttc_mi, patients_sttc, patients_other], axis=0)

In [11]:

X_train, X_test, X_val, y_train, y_test, y_val = split_train_test_val(patients_classes["patient_id"], patients_classes["class"], sizes = [0.10, 0.20], random_state = 42, stratify = patients_classes["class"])

In [12]:
train_patients = pd.DataFrame({"patient" : X_train, 
              "class": y_train,
              "sample" : "train"})
test_patients = pd.DataFrame({"patient" : X_test, 
              "class": y_test,
              "sample" : "test"})
val_patients = pd.DataFrame({"patient" : X_val, 
              "class": y_val,
              "sample" : "validation"})
patients = pd.concat([train_patients, test_patients, val_patients], axis=0)

In [13]:
patients.to_csv("output/patients.csv", index = False)

In [14]:
patients_train = patients[(patients["sample"] == "train")]
patients_test = patients[(patients["sample"] == "test")]
patients_val = patients[(patients["sample"] == "validation")]

In [15]:
patients_train_mi = patients_train[patients_train["class"] == "mi"]["patient"].values
patients_train_sttc_mi = patients_train[patients_train["class"] == "sttc_mi"]["patient"].values
patients_train_sttc = patients_train[patients_train["class"] == "sttc"]["patient"].values
patients_train_other = patients_train[patients_train["class"] == "other"]["patient"].values

In [16]:
df_mi_train = {patient : df_mi[patient] for patient in patients_train_mi}
df_sttc_mi_train = {patient : df_sttc_mi[patient] for patient in patients_train_sttc_mi}
df_sttc_train = {patient : df_sttc[patient] for patient in patients_train_sttc}
df_other_train = {patient : df_other[patient] for patient in patients_train_other}

## Feature Engineering

### ACF y PACF lags (5)

In [26]:
df_mi_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_mi_train, list_signals, apply_diff= True))
df_sttc_mi_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_sttc_mi_train, list_signals, apply_diff= True)) 
df_sttc_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_sttc_train, list_signals, apply_diff= True))
df_other_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_other_train, list_signals, apply_diff= True))

In [27]:
df_mi_acf_pacf_train.to_csv("output/features/mi_acf_pacf_train.csv", index = False)
df_sttc_mi_acf_pacf_train.to_csv("output/features/sttc_mi_acf_pacf_train.csv", index = False)
df_sttc_acf_pacf_train.to_csv("output/features/sttc_acf_pacf_train.csv", index = False)
df_other_acf_pacf_train.to_csv("output/features/other_acf_pacf_train.csv", index = False)

### Pick distribution seasonal

#### distribución en picos (seasonal, TFF, etc)

##### Número total de picos.

In [24]:
df_mi_signals = get_dict_labels(df_mi, list_signals)
df_sttc_mi_signals = get_dict_labels(df_sttc_mi, list_signals)
df_sttc_signals = get_dict_labels(df_sttc, list_signals)
df_other_signals = get_dict_labels(df_other, list_signals)

In [None]:
mi_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_mi_signals["II"])["mean"]),period = 100).seasonal
sttc_mi_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_sttc_mi_signals["II"])["mean"]),period = 100).seasonal
sttc_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_sttc_signals["II"])["mean"]),period = 100).seasonal
other_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_other_signals["II"])["mean"]),period = 100).seasonal

In [17]:
df_mi_train = {patient : df_mi[patient] for patient in patients_train_mi}
df_sttc_mi_train = {patient : df_sttc_mi[patient] for patient in patients_train_sttc_mi}
df_sttc_train = {patient : df_sttc[patient] for patient in patients_train_sttc}
df_other_train = {patient : df_other[patient] for patient in patients_train_other}

In [19]:
class SerieAnalisis:
    def __init__(self, serie):
        self.serie = serie
    def amplitud(self):
        return self.serie.max() - self.serie.min()
    def intensidad(self):
        return self.serie.std()
    def seasonal_serie(self, period = 100):
        return seasonal_decompose(self.serie, period = period).seasonal
    def ratio(self, period=100):
        seasonal = self.seasonal_serie(period)
        return seasonal.std() / self.serie.std()
    def mean_peaks(self, n_std = 2):
        diff_peaks = get_list_jumps(get_peaks_seasonal(self.serie, n_std = n_std))
        if len(diff_peaks) < 2:
            return np.nan
        return np.mean(diff_peaks)
    def std_peaks(self, n_std = 2):
        diff_peaks = get_list_jumps(get_peaks_seasonal(self.serie, n_std = n_std))
        if len(diff_peaks) < 2:
            return np.nan
        return np.std(diff_peaks)
    def n_peaks(self, n_std = 2):
        peaks = get_peaks_seasonal(self.serie, n_std = n_std)
        return len(peaks)
        

In [33]:
def get_serie_summary(serie, cara, period = 100):
    """
    Genera un resumen de la serie temporal
    """
    class_serie = SerieAnalisis(serie)
    
    return {
        "amplitud" + "_" + cara : class_serie.amplitud(),
        "intensidad" + "_" + cara : class_serie.intensidad(),
        "ratio" + "_" + cara : class_serie.ratio(),
        "mean_peaks" + "_" + cara : class_serie.mean_peaks(),
        "std_peaks" + "_" + cara : class_serie.std_peaks(),
        "n_peaks" + "_" + cara : class_serie.n_peaks()
    }

In [53]:
def get_dict_serie_summary(dict, caras, period,) -> pd.DataFrame:
    """Genera un resumen de la serie temporal

    Args:
        dict (_type_): _description_
        caras (_type_): _description_
        period (_type_): _description_

    Returns:
        pd.DataFrame: _description_
    """

    df_summary = pd.DataFrame()
    
    for patient in dict.keys():
        df_aux = pd.DataFrame()
        for cara in caras:
            if cara in dict[patient].keys():
                serie = dict[patient][cara]
                summary = get_serie_summary(serie, cara, period)
                df_aux = pd.concat([df_aux, pd.DataFrame(summary, index=[0])], axis=1)
        df_aux["patient"] = patient
        df_aux["cara"] = cara
        df_summary = pd.concat([df_summary, df_aux], axis=0)
                
    return df_summary.reset_index(drop=True)

In [54]:
df_mi_peak_train = get_dict_serie_summary(df_mi_train, list_signals, 100)
df_sttc_mi_peak_train = get_dict_serie_summary(df_sttc_mi_train, list_signals, 100)
df_sttc_peak_train = get_dict_serie_summary(df_sttc_train, list_signals, 100)
df_other_peak_train = get_dict_serie_summary(df_other_train, list_signals, 100)

In [25]:
df_mi_peak_train.to_csv("output/features/mi_peak_train.csv", index = False)
df_sttc_mi_peak_train.to_csv("output/features/sttc_mi_peak_train.csv", index = False)
df_sttc_peak_train.to_csv("output/features/sttc_peak_train.csv", index = False)
df_other_peak_train.to_csv("output/features/other_peak_train.csv", index = False)

85.4

### Cross Correlation features