# CHB dataset 1

Gerando dataset `chb01.csv`

## Importando bibliotecas

In [4]:
import numpy as np
import pandas as pd
import seaborn as sns

from zipfile import ZipFile
from matplotlib import pyplot as plt
from statsmodels.robust.scale import mad as medianAD

## Carregando Matrizes de arquivo zip

In [5]:
with ZipFile('chb01.zip', 'r') as data:
    file_list = data.namelist()
    file_list.sort()

    pos_list = [pos for pos in file_list if ('chb01/positive/' in pos)]
    neg_list = [file_list[i] for i in range(len(pos_list))]

    print(f'pos_list: {len(pos_list)}\tneg_list: {len(neg_list)}')

    pos_space, neg_space = [], []
    
    for pos_file, neg_file in zip(pos_list, neg_list):
        with data.open(name=pos_file, mode='r') as pos, data.open(name=neg_file, mode='r') as neg:
            pos_space.append(np.load(pos))
            neg_space.append(np.load(neg))

    pos_space = np.array(pos_space, dtype=np.float64)
    neg_space = np.array(neg_space, dtype=np.float64)
    
    print(f'pos_space.shape: {pos_space.shape}\tneg_space.shape: {neg_space.shape}')
    print(f'pos_space.dtype: {pos_space.dtype}\tneg_space.dtype: {neg_space.dtype}')

pos_list: 216	neg_list: 216
pos_space.shape: (216, 18, 512)	neg_space.shape: (216, 18, 512)
pos_space.dtype: float64	neg_space.dtype: float64


## Extraindo atributos

In [None]:
colunas = [f'{feature}-{i}' for i in range(18) for feature in ['std', 'mean', 'skew', 'kurt', 'meanAD', 'medianAD']] + ['target']

posDF, negDF, dataset = pd.DataFrame(), pd.DataFrame(), pd.DataFrame(columns=colunas)

for pos_mat, neg_mat in zip(pos_space, neg_space):

    pos_matDF = pd.DataFrame(data=pos_mat).transpose()
    posDF['std'] = pos_matDF.std()
    posDF['mean'] = pos_matDF.mean()
    posDF['skew'] = pos_matDF.skew()
    posDF['kurt'] = pos_matDF.kurt()
    posDF['meanAD'] = pos_matDF.mad()
    posDF['medianAD'] = medianAD(pos_matDF)
    
    neg_matDF = pd.DataFrame(data=neg_mat).transpose()
    negDF['std'] = neg_matDF.std()
    negDF['mean'] = neg_matDF.mean()
    negDF['skew'] = neg_matDF.skew()
    negDF['kurt'] = neg_matDF.kurt()
    negDF['meanAD'] = neg_matDF.mad()
    negDF['medianAD'] = medianAD(neg_matDF)