## Lendo e sepando colunas

In [1]:
import pandas as pd
census = pd.read_csv('../dataset/census.csv')

previsores = census.iloc[:, 0:14].values
classe = census.iloc[:, 14].values

## Transformação de variáveis categóricas

In [2]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_data = LabelEncoder()

previsores[:,1] = labelencoder_data.fit_transform(previsores[:,1])
previsores[:,3] = labelencoder_data.fit_transform(previsores[:,3])
previsores[:,5] = labelencoder_data.fit_transform(previsores[:,5])
previsores[:,6] = labelencoder_data.fit_transform(previsores[:,6])
previsores[:,7] = labelencoder_data.fit_transform(previsores[:,7])
previsores[:,8] = labelencoder_data.fit_transform(previsores[:,8])
previsores[:,9] = labelencoder_data.fit_transform(previsores[:,9])
previsores[:,13] = labelencoder_data.fit_transform(previsores[:,13])

In [3]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer(
    [('one_hot_encoder', 
      OneHotEncoder(categories='auto'), 
      [1,3,5,6,7,8,9,13])],   
    remainder='passthrough')

previsores = ct.fit_transform(previsores).toarray()

labelencoder_classe = LabelEncoder()
classe = labelencoder_classe.fit_transform(classe)

## Escalonamento dos valores

In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

previsores = scaler.fit_transform(previsores)

## Divisão do dataset

In [5]:
from sklearn.model_selection import train_test_split
previsores_train, previsores_test, classe_train, classe_test = train_test_split(previsores, classe, 
                                                                    test_size=0.15, 
                                                                    random_state=0)

from sklearn.metrics import confusion_matrix, accuracy_score

## Naive Bayes -> Tabela de Probabilidade

In [6]:
from sklearn.naive_bayes import GaussianNB
Naive_Bayes = GaussianNB()
Naive_Bayes.fit(previsores_train, classe_train)

previsoes = Naive_Bayes.predict(previsores_test)
Naive_Bayes_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

## Árvore de Decisão

In [7]:
from sklearn.tree import DecisionTreeClassifier
Decision_Tree = DecisionTreeClassifier(criterion='entropy', random_state=0)
Decision_Tree.fit(previsores_train, classe_train)

previsoes = Decision_Tree.predict(previsores_test)
Decision_Tree_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

## Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier
Random_Forest = RandomForestClassifier(n_estimators = 40,
                                      criterion = 'entropy',
                                      random_state = 0)
Random_Forest.fit(previsores_train, classe_train)

previsoes = Random_Forest.predict(previsores_test)
Random_Forest_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

## Base Line Classifier

In [9]:
## O minimo que o modelo necessita aprender

import collections
counter = collections.Counter(classe_test)
base_line = counter[0] / (counter[0] + counter[1])

## KNN

In [10]:
from sklearn.neighbors import KNeighborsClassifier
KNN = KNeighborsClassifier(n_neighbors = 5,
                                    metric = 'minkowski',
                                    p = 2)
KNN.fit(previsores_train, classe_train)
previsoes = KNN.predict(previsores_test)

KNN_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

## Regressão Logística

In [11]:
from sklearn.linear_model import LogisticRegression

Logistic_Regression = LogisticRegression(random_state = 0)
Logistic_Regression.fit(previsores_train, classe_train)

previsoes = Logistic_Regression.predict(previsores_test)

Logistic_Regression_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

## SVM

In [12]:
from sklearn.svm import SVC
SVM = SVC(kernel = 'linear', random_state = 0)
SVM.fit(previsores_train, classe_train)

previsoes = SVM.predict(previsores_test)

SVM_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

## Redes Neurais com SkLearn

In [13]:
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(verbose = True,
                    max_iter = 1000,
                    tol = 0.000010,
                    solver = 'adam',
                    hidden_layer_sizes = (100),
                    activation = 'relu')
MLP.fit(previsores_train, classe_train)

previsoes = MLP.predict(previsores_test)

MLP_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

Iteration 1, loss = 0.39318731
Iteration 2, loss = 0.32488956
Iteration 3, loss = 0.31468027
Iteration 4, loss = 0.30840031
Iteration 5, loss = 0.30412810
Iteration 6, loss = 0.30060632
Iteration 7, loss = 0.29769542
Iteration 8, loss = 0.29578873
Iteration 9, loss = 0.29395631
Iteration 10, loss = 0.29124730
Iteration 11, loss = 0.28926160
Iteration 12, loss = 0.28772316
Iteration 13, loss = 0.28599262
Iteration 14, loss = 0.28482784
Iteration 15, loss = 0.28323003
Iteration 16, loss = 0.28138121
Iteration 17, loss = 0.27998739
Iteration 18, loss = 0.27796830
Iteration 19, loss = 0.27733086
Iteration 20, loss = 0.27618096
Iteration 21, loss = 0.27498588
Iteration 22, loss = 0.27353980
Iteration 23, loss = 0.27313299
Iteration 24, loss = 0.27182834
Iteration 25, loss = 0.27053620
Iteration 26, loss = 0.26968592
Iteration 27, loss = 0.26871480
Iteration 28, loss = 0.26778063
Iteration 29, loss = 0.26774988
Iteration 30, loss = 0.26621121
Iteration 31, loss = 0.26523359
Iteration 32, los

Iteration 253, loss = 0.19842409
Iteration 254, loss = 0.19753229
Iteration 255, loss = 0.19846978
Iteration 256, loss = 0.19909455
Iteration 257, loss = 0.19835120
Iteration 258, loss = 0.19881358
Iteration 259, loss = 0.19801677
Iteration 260, loss = 0.19707134
Iteration 261, loss = 0.19854853
Iteration 262, loss = 0.19860782
Iteration 263, loss = 0.19788023
Iteration 264, loss = 0.19865773
Iteration 265, loss = 0.19714293
Iteration 266, loss = 0.19701137
Iteration 267, loss = 0.19692665
Iteration 268, loss = 0.19744973
Iteration 269, loss = 0.19703660
Iteration 270, loss = 0.19467217
Iteration 271, loss = 0.19657876
Iteration 272, loss = 0.19567877
Iteration 273, loss = 0.19563419
Iteration 274, loss = 0.19613500
Iteration 275, loss = 0.19589444
Iteration 276, loss = 0.19608934
Iteration 277, loss = 0.19430079
Iteration 278, loss = 0.19551273
Iteration 279, loss = 0.19549576
Iteration 280, loss = 0.19626400
Iteration 281, loss = 0.19554901
Iteration 282, loss = 0.19572990
Iteration 

## Redes Neurais com Keras

In [18]:
import keras
from keras.models import Sequential
from keras.layers import Dense

MLP_keras = Sequential()
MLP_keras.add(Dense(units = 55, activation = 'relu', input_dim = 108))
MLP_keras.add(Dense(units = 55, activation = 'relu'))
MLP_keras.add(Dense(units = 1, activation = 'sigmoid'))

MLP_keras.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
MLP_keras.fit(previsores_train, classe_train, batch_size = 1, nb_epoch = 100)

previsoes = MLP.predict(previsores_test)
previsoes = (previsoes > 0.5)

MLP_keras_accuracy = accuracy_score(classe_test, previsoes)
matriz = confusion_matrix(classe_test, previsoes)

  # This is added back by InteractiveShellApp.init_path()


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## Resultado

In [19]:
GREEN = "\033[1;32m" 
RESET = '\033[0;0m'
NEGRITO = '\033[1m'
models = {'Base Line': base_line,'Naive Bayes': Naive_Bayes_accuracy, 'Decision Tree': Decision_Tree_accuracy,
          'Random Forest': Random_Forest_accuracy, 'KNN': KNN_accuracy, 'Logistic Regression': Logistic_Regression_accuracy,
          'SVM': SVM_accuracy, 'MLP': MLP_accuracy, 'MLP_Keras': MLP_keras_accuracy}

for model in models:
    print('{}{}{}: {}{:.4f}%{}'.format(NEGRITO,model,RESET,GREEN,models[model], RESET))

[1mBase Line[0;0m: [1;32m0.7560%[0;0m
[1mNaive Bayes[0;0m: [1;32m0.4768%[0;0m
[1mDecision Tree[0;0m: [1;32m0.8104%[0;0m
[1mRandom Forest[0;0m: [1;32m0.8477%[0;0m
[1mKNN[0;0m: [1;32m0.8223%[0;0m
[1mLogistic Regression[0;0m: [1;32m0.8495%[0;0m
[1mSVM[0;0m: [1;32m0.8508%[0;0m
[1mMLP[0;0m: [1;32m0.8197%[0;0m
[1mMLP_Keras[0;0m: [1;32m0.8197%[0;0m
