# Projeto da Disciplina 

Este notebook contém um script base para o projeto da disciplina IF702 Redes Neurais.

### Importando dependencias

In [9]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.metrics import roc_auc_score, average_precision_score
import matplotlib
matplotlib.use('nbagg')
import matplotlib.pyplot as plt

## Preparação de Dados

Inicialmente precisamos carregar os dados e eliminar exemplos repetidos, fazemos isso usando o Pandas.
Em seguida exibimos uma pequena amostra do head do dataset e seus dados estatísticos.

In [10]:
data_set = pd.read_csv('data/TRN',sep='\t')
data_set.drop_duplicates(inplace=True)  # Remove exemplos repetidos

In [11]:
data_set.head(5)

Unnamed: 0,INDEX,UF_1,UF_2,UF_3,UF_4,UF_5,UF_6,UF_7,IDADE,SEXO_1,...,CEP4_7,CEP4_8,CEP4_9,CEP4_10,CEP4_11,CEP4_12,CEP4_13,CEP4_14,IND_BOM_1_1,IND_BOM_1_2
0,0,1,1,1,0,0,0,0,0.135098,1,...,0,0,1,1,0,1,1,1,0,1
1,1,1,0,1,0,0,1,0,0.273504,1,...,0,1,0,1,1,0,0,0,1,0
2,2,1,0,1,0,0,1,0,0.28191,0,...,1,1,0,0,0,0,1,0,1,0
3,3,1,1,1,0,0,0,0,0.225741,0,...,1,1,0,1,1,0,1,0,1,0
4,4,1,1,0,0,0,1,0,0.480403,0,...,1,1,1,0,0,1,0,1,1,0


In [12]:
data_set.describe()

Unnamed: 0,INDEX,UF_1,UF_2,UF_3,UF_4,UF_5,UF_6,UF_7,IDADE,SEXO_1,...,CEP4_7,CEP4_8,CEP4_9,CEP4_10,CEP4_11,CEP4_12,CEP4_13,CEP4_14,IND_BOM_1_1,IND_BOM_1_2
count,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,...,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0,389196.0
mean,194597.5,0.889274,0.691952,0.476552,0.296195,0.241179,0.218011,0.186836,0.4552049,0.521514,...,0.423378,0.41754,0.425708,0.45982,0.440842,0.436896,0.433709,0.440339,0.655449,0.344551
std,112351.35202,0.313793,0.461687,0.499451,0.456579,0.427799,0.412895,0.389781,0.2537459,0.499538,...,0.494095,0.493154,0.494451,0.498384,0.496489,0.496002,0.495587,0.496428,0.475222,0.475222
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.506237e-16,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,97298.75,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2507866,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,194597.5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.4375241,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
75%,291896.25,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.6578835,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,389195.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [13]:
data_set.head(50)

Unnamed: 0,INDEX,UF_1,UF_2,UF_3,UF_4,UF_5,UF_6,UF_7,IDADE,SEXO_1,...,CEP4_7,CEP4_8,CEP4_9,CEP4_10,CEP4_11,CEP4_12,CEP4_13,CEP4_14,IND_BOM_1_1,IND_BOM_1_2
0,0,1,1,1,0,0,0,0,0.1350979,1,...,0,0,1,1,0,1,1,1,0,1
1,1,1,0,1,0,0,1,0,0.2735041,1,...,0,1,0,1,1,0,0,0,1,0
2,2,1,0,1,0,0,1,0,0.2819095,0,...,1,1,0,0,0,0,1,0,1,0
3,3,1,1,1,0,0,0,0,0.2257406,0,...,1,1,0,1,1,0,1,0,1,0
4,4,1,1,0,0,0,1,0,0.4804034,0,...,1,1,1,0,0,1,0,1,1,0
5,5,0,1,1,0,0,0,1,0.2193229,0,...,0,1,1,0,1,0,0,1,1,0
6,6,1,1,1,0,0,0,0,0.5734882,0,...,0,0,0,1,1,0,0,0,1,0
7,7,0,1,0,0,1,0,1,0.8343982,1,...,0,1,0,0,0,1,1,1,1,0
8,8,1,1,0,1,0,0,0,0.3474494,1,...,1,0,0,0,1,0,1,1,1,0
9,9,1,1,1,0,0,0,0,5.506237e-16,1,...,1,1,0,0,0,1,0,0,1,0
