# Qualidade do ar e problemas respiratórios na região metropolitana de Salvador-Bahia: uma análise estatística

In [7]:
# Download Brazil territorial meshes
!wget -r -np -nH -N -R "*.html" --no-directories --directory-prefix=meshes_br2018 \
    ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR/ \
    ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Leia_me_Malha_Digital_2018.pdf

--2020-04-01 23:36:05--  ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR/
           => ‘meshes_br2018/.listing’
Resolving geoftp.ibge.gov.br (geoftp.ibge.gov.br)... 170.84.40.11
Connecting to geoftp.ibge.gov.br (geoftp.ibge.gov.br)|170.84.40.11|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR ... done.
==> PASV ... done.    ==> LIST ... done.

.listing                [ <=>                ]     501  --.-KB/s    in 0s      

2020-04-01 23:36:10 (29.0 MB/s) - ‘meshes_br2018/.listing’ saved [501]

Removed ‘meshes_br2018/.listing’.
--2020-04-01 23:36:10--  ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR/BR.zip
           => ‘meshes_br2018/BR.zip’
==> CWD not required.
==> PASV ... done.    =

## Importação das bibliotecas

In [1]:
from ipywidgets import interact, widgets

import pandas as pd
import glob

## Variáveis globais

In [60]:
ARGS = dict(
    path_airq = '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/', # interval_15min
    path_wthr = '../datas/preprocessing/mr-ssa/weather/cetrel/',
    path_dsus = '../datas/preprocessing/mr-ssa/datasus/',
)

I_COLS = dict(
    aq_cols = widgets.SelectMultiple(),
    wt_cols = widgets.SelectMultiple(),
    ds_cols = widgets.SelectMultiple(),
)

## Funções

In [3]:
def show_enum(arr: [str]) -> {int: str}:
    return {idx: value for (idx, value) in enumerate(arr)}

In [20]:
def describe_data(
    df: pd.DataFrame, headers: [str] = None
) -> pd.DataFrame:

    def _apply(header: str, column: []):
        _max = column.max()
        _min = column.min()

        _q1 = column.quantile(0.25)
        _q3 = column.quantile(0.75)
        _iqr = _q3 - _q1
        _lower = max(_min, _q1 - (1.5 * _iqr))
        _upper = min(_max, _q3 + (1.5 * _iqr))
            

        return {
            'header': header,
            'max': _max,
            'min': _min,
            'mean': column.mean(),
            'median': column.median(),
            'lower-limit': _lower,
            'quartile-1': _q1,
            'quartile-3': _q3,
            'upper-limit': _upper,
            'var': column.var(),
            'std': column.std(),
            'mad': column.mad(),
            'amp': _max - _min,
            'rms': (column.pow(2)).mean() ** 0.5,
            'kurtosis': column.kurtosis(),
            'skew': column.skew(),
            'count': column.count(),
            'nans': column.isna().sum(),
        }

    if not headers:
        headers = df.columns

    return pd.DataFrame(
        [_apply(h, df[h]) for h in headers]
    ).set_index('header')

In [63]:
def show_columns(file: str, icols: str):
    df = pd.read_csv(file, low_memory=False, compression='gzip')
    I_COLS[icols].options = df.columns # Global
    display(show_enum(df.columns))

In [66]:
def show_describe(file: str, columns: [str]):
    df = pd.read_csv(file, low_memory=False, compression='gzip')
    display(describe_data(df, columns))

## Análise descritiva

### Air quality

In [50]:
f_airq = glob.glob(f"{ARGS['path_airq']}/*.gz")
f_airq

['../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/AV ACM - DETRAN.csv.gz',
 '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/AV BARROS REIS.csv.gz',
 '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/CAMPO GRANDE.csv.gz',
 '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/DIQUE DO TORORÓ.csv.gz',
 '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/ITAIGARA.csv.gz',
 '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/PARALELA-CAB.csv.gz',
 '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/PIRAJÁ.csv.gz',
 '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/RIO VERMELHO.csv.gz']

#### Columns

In [54]:
aq_files = widgets.Dropdown(options=f_airq)
interact(show_columns, file=aq_files, icols='aq_cols')

interactive(children=(Dropdown(description='file', options=('../datas/preprocessing/mr-ssa/airquality/cetrel/i…

<function __main__.show_columns(file: str, icols: str)>

#### Describe

In [56]:
interact(show_describe, file=aq_files, columns=I_COLS['aq_cols'])

interactive(children=(Dropdown(description='file', options=('../datas/preprocessing/mr-ssa/airquality/cetrel/i…

<function __main__.show_describe(file: str, columns: [<class 'str'>])>

### Weather

In [57]:
f_wthr = glob.glob(f"{ARGS['path_wthr']}/*.gz")
f_wthr

['../datas/preprocessing/mr-ssa/weather/cetrel/AV. ACM - DETRAN.csv.gz',
 '../datas/preprocessing/mr-ssa/weather/cetrel/AV. BARROS REIS.csv.gz',
 '../datas/preprocessing/mr-ssa/weather/cetrel/CAMPO GRANDE.csv.gz',
 '../datas/preprocessing/mr-ssa/weather/cetrel/DIQUE DO TORORO.csv.gz',
 '../datas/preprocessing/mr-ssa/weather/cetrel/ITAIGARA.csv.gz',
 '../datas/preprocessing/mr-ssa/weather/cetrel/PARALELA-CAB.csv.gz',
 '../datas/preprocessing/mr-ssa/weather/cetrel/PIRAJA.csv.gz',
 '../datas/preprocessing/mr-ssa/weather/cetrel/RIO VERMELHO.csv.gz']

#### Columns

In [58]:
wt_files = widgets.Dropdown(options=f_wthr)
interact(show_columns, file=wt_files, icols='wt_cols')

interactive(children=(Dropdown(description='file', options=('../datas/preprocessing/mr-ssa/weather/cetrel/AV. …

<function __main__.show_columns(file: str, icols: str)>

#### Describe

In [59]:
interact(show_describe, file=wt_files, columns=I_COLS['wt_cols'])

interactive(children=(Dropdown(description='file', options=('../datas/preprocessing/mr-ssa/weather/cetrel/AV. …

<function __main__.show_describe(file: str, columns: [<class 'str'>])>

### Datasus

In [61]:
f_dsus = glob.glob(f"{ARGS['path_dsus']}/*.gz")
f_dsus

['../datas/preprocessing/mr-ssa/datasus/RD.csv.gz']

#### Columns

In [64]:
ds_files = widgets.Dropdown(options=f_dsus)
interact(show_columns, file=ds_files, icols='ds_cols')

interactive(children=(Dropdown(description='file', options=('../datas/preprocessing/mr-ssa/datasus/RD.csv.gz',…

<function __main__.show_columns(file: str, icols: str)>

#### Describe

In [67]:
interact(show_describe, file=ds_files, columns=I_COLS['ds_cols'])

interactive(children=(Dropdown(description='file', options=('../datas/preprocessing/mr-ssa/datasus/RD.csv.gz',…

<function __main__.show_describe(file: str, columns: [<class 'str'>])>