# Variáveis Individuais

In [None]:
import sys
import warnings
sys.path.append('..')
import seaborn as sns
from myst_nb import glue
from global_configurations import *

warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

conf_dict = read_config_file(DATASET_INFO_PATH)
df = read_dataset(conf_dict, DATASET_PATH)

df, numerical, categorical, categorical_dt, datetime, target = infer_column_types(df, conf_dict)

In [None]:
numerical_stats = df[numerical].describe().T.sort_index()

glue("numerical_stats", numerical_stats)

In [None]:
fig = plt.figure(figsize = (16, 7))

for idx, var in enumerate(numerical, 1):
    ax = fig.add_subplot(2, 4, idx)
    sns.histplot(df[var], ax=ax, kde=True)
    ax.set(ylabel='')
    
fig.suptitle('Histograma das variáveis numéricas')
fig.tight_layout()

glue("numerical_hist_plot", fig, display=False)

In [None]:
# TODO: QQPlot para variáveis numéricas

In [None]:
fig = plt.figure(figsize = (16, 7))

for idx, var in enumerate(numerical, 1):
    ax = fig.add_subplot(2, 4, idx)
    sns.boxplot(y=var, data=df[[var]], ax=ax, palette='flare')
    ax.set(ylabel='', xlabel=var)
    
fig.suptitle('Boxplot das variáveis numéricas')
fig.tight_layout()

glue("numerical_box_plot", fig, display=False)

## Numéricas
```{glue:figure} numerical_stats
```
```{glue:figure} numerical_hist_plot
```

In [None]:
fig = plt.figure(figsize=(10, 5))
data = df[categorical].nunique().to_frame(name='Qtd')
data = data.reset_index().sort_values(by='Qtd', ascending=False)

ax = sns.barplot(y='Qtd', x='index', data=data)
ax.set(xlabel='', title='Quantidade de valores únicos (categorias)')
ax.tick_params(axis='x', rotation=90)

glue("categorical_unique_plot", fig, display=False)

In [None]:
fig = plt.figure(figsize=(15, 35))

for idx, var in enumerate(categorical, 1):
    ax = fig.add_subplot(10, 4, idx)
    data = df[var].value_counts()[:10]
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    data.categoria = data.categoria.apply(lambda x: f"{x[:15]}..." if isinstance(x, str) else x)
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax)
    ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
    ax.set(xlabel="", title=var)
    
fig.suptitle('Top 10 categorias por variável', y=1)    
fig.tight_layout()

glue("categorical_top_categories_plot", fig, display=False)

## Categóricas
```{glue:figure} categorical_unique_plot
```
```{glue:figure} categorical_top_categories_plot
```

In [None]:
categorical_year = list(get_datetime_var_names(datetime, 'year'))
categorical_month = list(get_datetime_var_names(datetime, 'month'))
categorical_week_number = list(get_datetime_var_names(datetime, 'week_number'))
categorical_day = list(get_datetime_var_names(datetime, 'day'))
categorical_day_week = list(get_datetime_var_names(datetime, 'day_week'))
categorical_hour = list(get_datetime_var_names(datetime, 'hour'))
categorical_minute = list(get_datetime_var_names(datetime, 'minute'))
categorical_second = list(get_datetime_var_names(datetime, 'second'))

## Data

In [None]:
fig = plt.figure(figsize=(15, 8))

for idx, var in enumerate(categorical_year, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_year_plot", fig, display=False)

### Ano
```{glue:figure} categoricaldt_year_plot
```

In [None]:
fig = plt.figure(figsize=(15, 7))

for idx, var in enumerate(categorical_month, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_month_plot", fig, display=False)

### Mês
```{glue:figure} categoricaldt_month_plot
```

In [None]:
fig = plt.figure(figsize=(15, 7))

for idx, var in enumerate(categorical_week_number, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_week_plot", fig, display=False)

### Semana
```{glue:figure} categoricaldt_week_plot
```

In [None]:
fig = plt.figure(figsize=(15, 7))

for idx, var in enumerate(categorical_day, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_day_plot", fig, display=False)

### Dia
```{glue:figure} categoricaldt_day_plot
```

In [None]:
fig = plt.figure(figsize=(15, 7))

for idx, var in enumerate(categorical_day_week, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_week_day_plot", fig, display=False)

### Dia da Semana
```{glue:figure} categoricaldt_week_day_plot
```

In [None]:
fig = plt.figure(figsize=(15, 7))

for idx, var in enumerate(categorical_hour, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_hour_plot", fig, display=False)

### Hora
```{glue:figure} categoricaldt_hour_plot
```

In [None]:
fig = plt.figure(figsize=(15, 7))

for idx, var in enumerate(categorical_minute, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_minute_plot", fig, display=False)

### Minuto
```{glue:figure} categoricaldt_minute_plot
```

In [None]:
fig = plt.figure(figsize=(15, 7))

for idx, var in enumerate(categorical_second, 1):
    ax = fig.add_subplot(3, 4, idx)
    data = df[var].value_counts()
    data = data.to_frame().reset_index()
    data.columns = ['categoria', 'qtd']
    
    sns.barplot(x='categoria', y='qtd', data=data, ax=ax, palette='flare')
    ax.set(xlabel="", title=var)

fig.tight_layout()

glue("categoricaldt_week_second_plot", fig, display=False)

### Segundo
```{glue:figure} categoricaldt_week_second_plot
```

