# Qualidade do ar e problemas respiratórios na região metropolitana de Salvador-Bahia: uma análise estatística

In [7]:
# Download Brazil territorial meshes
!wget -r -np -nH -N -R "*.html" --no-directories --directory-prefix=meshes_br2018 \
    ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR/ \
    ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Leia_me_Malha_Digital_2018.pdf

--2020-04-01 23:36:05--  ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR/
           => ‘meshes_br2018/.listing’
Resolving geoftp.ibge.gov.br (geoftp.ibge.gov.br)... 170.84.40.11
Connecting to geoftp.ibge.gov.br (geoftp.ibge.gov.br)|170.84.40.11|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR ... done.
==> PASV ... done.    ==> LIST ... done.

.listing                [ <=>                ]     501  --.-KB/s    in 0s      

2020-04-01 23:36:10 (29.0 MB/s) - ‘meshes_br2018/.listing’ saved [501]

Removed ‘meshes_br2018/.listing’.
--2020-04-01 23:36:10--  ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2018/Brasil/BR/BR.zip
           => ‘meshes_br2018/BR.zip’
==> CWD not required.
==> PASV ... done.    =

## Importação das bibliotecas

In [3]:
from pandas_profiling import ProfileReport
from ipywidgets import interact, widgets
from IPython.display import Image

import matplotlib.pyplot as plt
import missingno as msno
import seaborn as sns
import pandas as pd
import glob
import os

In [4]:
%matplotlib inline

## Variáveis globais

In [5]:
f_airq = '../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/airquality.csv.gz' # interval_15min
f_wthr = '../datas/preprocessing/mr-ssa/weather/cetrel/weather.csv.gz'
f_dsus = '../datas/preprocessing/mr-ssa/datasus/RD.csv.gz'

I_COLS = dict(
    aq_cols = widgets.SelectMultiple(),
    wt_cols = widgets.SelectMultiple(),
    ds_cols = widgets.SelectMultiple(),
)

## Funções

In [6]:
def show_enum(arr: [str]) -> {int: str}:
    return {idx: value for (idx, value) in enumerate(arr)}

In [7]:
def describe_data(
    df: pd.DataFrame, headers: [str] = None
) -> pd.DataFrame:

    def _apply(header: str, column: []):
        _max = column.max()
        _min = column.min()

        _q1 = column.quantile(0.25)
        _q3 = column.quantile(0.75)
        _iqr = _q3 - _q1
        _lower = max(_min, _q1 - (1.5 * _iqr))
        _upper = min(_max, _q3 + (1.5 * _iqr))
            

        return {
            'header': header,
            'max': _max,
            'min': _min,
            'mean': column.mean(),
            'median': column.median(),
            'lower-limit': _lower,
            'quartile-1': _q1,
            'quartile-3': _q3,
            'upper-limit': _upper,
            'var': column.var(),
            'std': column.std(),
            'mad': column.mad(),
            'amp': _max - _min,
            'rms': (column.pow(2)).mean() ** 0.5,
            'kurtosis': column.kurtosis(),
            'skew': column.skew(),
            'count': column.count(),
            'nans': column.isna().sum(),
        }

    if not headers:
        headers = df.columns

    return pd.DataFrame(
        [_apply(h, df[h]) for h in headers]
    ).set_index('header')

In [8]:
def show_dataframe(file: str, icols: str):
    df = pd.read_csv(file, low_memory=False, compression='gzip')
    I_COLS[icols].options = df.columns # Global
    display(df)

In [9]:
def show_describe(file: str, columns: [str]):
    df = pd.read_csv(file, low_memory=False, compression='gzip')
    display(describe_data(df, columns))
    display(df.info())

In [10]:
def show_missingno(file: str, method: str):
    methods = dict(
        dendrogram = msno.dendrogram,
        heatmap = msno.heatmap,
        matrix = msno.matrix,
        bar = msno.bar,
    )
    df = pd.read_csv(file, low_memory=False, compression='gzip')
    display(methods[method](df))

In [11]:
def show_profiling(file: str, save_prof: bool):
    df = pd.read_csv(file, low_memory=False, compression='gzip')
    prof = ProfileReport(df, title=file.split('/')[-1])
    prof.to_widgets()
    if save_prof:
        prof.to_file(output_file=f'{os.path.splitext(file)[0]}.html')

## Análise descritiva

### Air quality

#### Dataframe

In [16]:
interact(show_dataframe, file=f_airq, icols='aq_cols')

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/airquality.cs…

<function __main__.show_dataframe(file: str, icols: str)>

#### Missingno

In [17]:
interact(
    show_missingno,
    file=f_airq,
    method=widgets.Dropdown(options=['bar', 'matrix', 'heatmap', 'dendrogram'])
)

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/airquality.cs…

<function __main__.show_missingno(file: str, method: str)>

#### Profiling

In [26]:
interact(show_profiling, file=f_airq, save_prof=True)

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/airquality.cs…

<function __main__.show_profiling(file: str, save_prof: bool)>

#### Describe

In [21]:
interact(show_describe, file=f_airq, columns=I_COLS['aq_cols'])

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/airquality/cetrel/interval_1hour/airquality.cs…

<function __main__.show_describe(file: str, columns: [<class 'str'>])>

#### Plot

In [19]:
# df = pd.read_csv(f_airq[0], index_col='Date & Time', low_memory=False, compression='gzip')
# df.plot()

---

### Weather

#### Dataframe

In [12]:
interact(show_dataframe, file=f_wthr, icols='wt_cols')

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/weather/cetrel/weather.csv.gz', description='f…

<function __main__.show_dataframe(file: str, icols: str)>

#### Missingno

In [13]:
interact(
    show_missingno,
    file=f_wthr,
    method=widgets.Dropdown(options=['bar', 'matrix', 'heatmap', 'dendrogram'])
)

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/weather/cetrel/weather.csv.gz', description='f…

<function __main__.show_missingno(file: str, method: str)>

#### Profiling

In [14]:
interact(show_profiling, file=f_wthr, save_prof=True)

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/weather/cetrel/weather.csv.gz', description='f…

<function __main__.show_profiling(file: str, save_prof: bool)>

#### Describe

In [16]:
interact(show_describe, file=f_wthr, columns=I_COLS['wt_cols'])

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/weather/cetrel/weather.csv.gz', description='f…

<function __main__.show_describe(file: str, columns: [<class 'str'>])>

#### Plot

---

### Datasus

#### Dataframe

In [23]:
interact(show_dataframe, file=f_dsus, icols='ds_cols')

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/datasus/RD.csv.gz', description='file'), Text(…

<function __main__.show_dataframe(file: str, icols: str)>

#### Missingno

In [24]:
interact(
    show_missingno,
    file=f_dsus,
    method=widgets.Dropdown(options=['bar', 'matrix', 'heatmap', 'dendrogram'])
)

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/datasus/RD.csv.gz', description='file'), Dropd…

<function __main__.show_missingno(file: str, method: str)>

#### Profiling

In [17]:
interact(show_profiling, file=f_dsus, save_prof=True)

interactive(children=(Text(value='../datas/preprocessing/mr-ssa/datasus/RD.csv.gz', description='file'), Check…

<function __main__.show_profiling(file: str, save_prof: bool)>

#### Describe

In [None]:
interact(show_describe, file=f_dsus, columns=I_COLS['ds_cols'])

#### Plot