## <a id = '0'> Índice </a>

* [**Entorno**](#1)  
   * [Librerías](#1d1)  
   * [Funciones](#1d2)  
   * [Constantes](#1d3)

* [**Lectura de datos**](#2)


## <a id = '1'> Entorno </a>
[índice](#0)

### <a id = '1d1'> Librerías </a>

In [150]:
import os
import pandas as pd
import numpy as np

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox

from statsmodels.tsa.arima.model import ARIMA

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX

import matplotlib.pyplot as plt

import math
import itertools
# from config import data_folder

from scipy.stats import kurtosis, trim_mean


In [2]:
os.chdir("../")

### <a id = '1d2'> Funciones </a>

In [3]:
from src.utils import *

### <a id = '1d3'> Constantes </a>

In [4]:
from config.cons import data_folder, window_jason, cara_lateral, cara_anterior, cara_interior, cara_sepal, list_signals

## <a id = '2'> Lectura de datos </a>
[índice](#0)

In [5]:
proyect_path = os.getcwd()
data_path = proyect_path + data_folder

In [6]:
lateral_combinaciones = list(itertools.combinations(cara_lateral, 2))
anterior_combinaciones = list(itertools.combinations(cara_anterior, 2))
interior_combinaciones = list(itertools.combinations(cara_interior, 2))
sepal_combinaciones = list(itertools.combinations(cara_sepal, 2))

In [176]:
# all_combinaciones = list(itertools.combinations(list_signals, 2))
all_combinaciones = list(itertools.combinations_with_replacement(list_signals, 2))
bisignal_combinaciones = list(itertools.combinations(list_signals, 2))

In [179]:
signal_combinaciones = [item for item in all_combinaciones if item not in bisignal_combinaciones]


In [181]:
bisignals_dict = {"bisignal_combinaciones": bisignal_combinaciones,
                  "signal_combinaciones": signal_combinaciones,}

In [7]:
df_all = lectura_carpetas_dict(data_path)

In [8]:
df_mi = df_all["mi"]
df_sttc_mi = df_all["sttc_mi"]
df_sttc = df_all["sttc"]
df_other = df_all["other"]

In [9]:
#Lectura de windows
import json
with open(window_jason, "r") as file:
    dict_data_jump = json.load(file)  

In [10]:
patients_mi = pd.DataFrame({'patient_id' : df_mi.keys(),
             'class' : "mi",
             "class_id" : 0})
patients_sttc_mi = pd.DataFrame({'patient_id' : df_sttc_mi.keys(),
             'class' : "sttc_mi",
             "class_id" : 1})
patients_sttc = pd.DataFrame({'patient_id' : df_sttc.keys(),
             'class' : "sttc",
             "class_id" : 2})
patients_other = pd.DataFrame({'patient_id' : df_other.keys(),
             'class' : "other",
             "class_id" : 3})

patients_classes = pd.concat([patients_mi, patients_sttc_mi, patients_sttc, patients_other], axis=0)

### Train Test Val

In [11]:

X_train, X_test, X_val, y_train, y_test, y_val = split_train_test_val(patients_classes["patient_id"], patients_classes["class"], sizes = [0.10, 0.20], random_state = 42, stratify = patients_classes["class"])

In [12]:
train_patients = pd.DataFrame({"patient" : X_train, 
              "class": y_train,
              "sample" : "train"})
test_patients = pd.DataFrame({"patient" : X_test, 
              "class": y_test,
              "sample" : "test"})
val_patients = pd.DataFrame({"patient" : X_val, 
              "class": y_val,
              "sample" : "validation"})
patients = pd.concat([train_patients, test_patients, val_patients], axis=0)

In [13]:
patients.to_csv("output/patients.csv", index = False)

In [14]:
patients_train = patients[(patients["sample"] == "train")]
patients_test = patients[(patients["sample"] == "test")]
patients_val = patients[(patients["sample"] == "validation")]

In [15]:
patients_train_mi = patients_train[patients_train["class"] == "mi"]["patient"].values
patients_train_sttc_mi = patients_train[patients_train["class"] == "sttc_mi"]["patient"].values
patients_train_sttc = patients_train[patients_train["class"] == "sttc"]["patient"].values
patients_train_other = patients_train[patients_train["class"] == "other"]["patient"].values

In [16]:
df_mi_train = {patient : df_mi[patient] for patient in patients_train_mi}
df_sttc_mi_train = {patient : df_sttc_mi[patient] for patient in patients_train_sttc_mi}
df_sttc_train = {patient : df_sttc[patient] for patient in patients_train_sttc}
df_other_train = {patient : df_other[patient] for patient in patients_train_other}

## Feature Engineering

### ACF y PACF lags (5)

In [26]:
df_mi_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_mi_train, list_signals, apply_diff= True))
df_sttc_mi_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_sttc_mi_train, list_signals, apply_diff= True)) 
df_sttc_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_sttc_train, list_signals, apply_diff= True))
df_other_acf_pacf_train = dict_to_dataframe(genera_df_acf_pacf(df_other_train, list_signals, apply_diff= True))

In [27]:
df_mi_acf_pacf_train.to_csv("output/features/mi_acf_pacf_train.csv", index = False)
df_sttc_mi_acf_pacf_train.to_csv("output/features/sttc_mi_acf_pacf_train.csv", index = False)
df_sttc_acf_pacf_train.to_csv("output/features/sttc_acf_pacf_train.csv", index = False)
df_other_acf_pacf_train.to_csv("output/features/other_acf_pacf_train.csv", index = False)

### Pick distribution seasonal

#### distribución en picos (seasonal, TFF, etc)

##### Número total de picos.

In [24]:
df_mi_signals = get_dict_labels(df_mi, list_signals)
df_sttc_mi_signals = get_dict_labels(df_sttc_mi, list_signals)
df_sttc_signals = get_dict_labels(df_sttc, list_signals)
df_other_signals = get_dict_labels(df_other, list_signals)

In [None]:
mi_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_mi_signals["II"])["mean"]),period = 100).seasonal
sttc_mi_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_sttc_mi_signals["II"])["mean"]),period = 100).seasonal
sttc_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_sttc_signals["II"])["mean"]),period = 100).seasonal
other_seasonal = seasonal_decompose(pd.Series(get_estadisticas(df_other_signals["II"])["mean"]),period = 100).seasonal

In [17]:
df_mi_train = {patient : df_mi[patient] for patient in patients_train_mi}
df_sttc_mi_train = {patient : df_sttc_mi[patient] for patient in patients_train_sttc_mi}
df_sttc_train = {patient : df_sttc[patient] for patient in patients_train_sttc}
df_other_train = {patient : df_other[patient] for patient in patients_train_other}

In [54]:
df_mi_peak_train = get_dict_serie_summary(df_mi_train, list_signals, 100)
df_sttc_mi_peak_train = get_dict_serie_summary(df_sttc_mi_train, list_signals, 100)
df_sttc_peak_train = get_dict_serie_summary(df_sttc_train, list_signals, 100)
df_other_peak_train = get_dict_serie_summary(df_other_train, list_signals, 100)

In [25]:
df_mi_peak_train.to_csv("output/features/mi_peak_train.csv", index = False)
df_sttc_mi_peak_train.to_csv("output/features/sttc_mi_peak_train.csv", index = False)
df_sttc_peak_train.to_csv("output/features/sttc_peak_train.csv", index = False)
df_other_peak_train.to_csv("output/features/other_peak_train.csv", index = False)

85.4

### Cross Correlation features

Correlaciones cruzadas
* Promedio de correlaciones por combinación.
* Desviación estándar de las correlaciones por combinación.
* Máxima y mínima correlación por combinación.
* Lag de la máxima correlación por combinación.
* Kurtosis.
* Promedio de todas las autocorrelaciones de la matriz.
* Norma de la matriz.
* Número de cruces por cero.

In [59]:
df_mi_train_ccf = genera_dict_comb_ccf(df_mi_train, all_combinaciones)

In [167]:
df_mi_ccf_train = patients_dict_ccf(df_mi_train, all_combinaciones)
df_sttc_mi_ccf_train = patients_dict_ccf(df_sttc_mi_train, all_combinaciones)
df_sttc_ccf_train = patients_dict_ccf(df_sttc_train, all_combinaciones)
df_other_ccf_train = patients_dict_ccf(df_other_train, all_combinaciones)

In [189]:
uni_combinacion = []
bi_combinacion = []
for col1, col2 in all_combinaciones:
    if col1 == col2:
        uni_combinacion.append(col1)
    else:
        bi_combinacion.append(col1 + "_" + col2)
dict_combinaciones = {
    "uni_combinacion": uni_combinacion,
    "bi_combinacion": bi_combinacion
}   

In [202]:
df_mi_cff_stats_train = get_dict_ccf_summary(df_mi_ccf_train, dict_combinaciones,)
df_sttc_mi_cff_stats_train = get_dict_ccf_summary(df_sttc_mi_ccf_train, dict_combinaciones,)
df_sttc_cff_stats_train = get_dict_ccf_summary(df_sttc_ccf_train, dict_combinaciones,)
df_other_cff_stats_train = get_dict_ccf_summary(df_other_ccf_train, dict_combinaciones,)

df_mi_cff_stats_train.to_csv("output/features/mi_cff_stats_train.csv", index = False)
df_sttc_mi_cff_stats_train.to_csv("output/features/sttc_mi_cff_stats_train.csv", index = False)
df_sttc_cff_stats_train.to_csv("output/features/sttc_cff_stats_train.csv", index = False)
df_other_cff_stats_train.to_csv("output/features/other_cff_stats_train.csv", index = False)