In [24]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
# carregamento dos dados em CSV
base = pd.read_csv('credit-data.csv')

In [3]:
# primeiros registros
base.head()

Unnamed: 0,clientid,income,age,loan,default
0,1,66155.925095,59.017015,8106.532131,0
1,2,34415.153966,48.117153,6564.745018,0
2,3,57317.170063,63.108049,8020.953296,0
3,4,42709.534201,45.751972,6103.64226,0
4,5,66952.688845,18.584336,8770.099235,1


In [4]:
# últimos registros
base.tail()

Unnamed: 0,clientid,income,age,loan,default
1995,1996,59221.044874,48.518179,1926.729397,0
1996,1997,69516.127573,23.162104,3503.176156,0
1997,1998,44311.449262,28.017167,5522.786693,1
1998,1999,43756.056605,63.971796,1622.722598,0
1999,2000,69436.579552,56.152617,7378.833599,0


In [5]:
# resumo dos dados
base.describe()

Unnamed: 0,clientid,income,age,loan,default
count,2000.0,2000.0,1997.0,2000.0,2000.0
mean,1000.5,45331.600018,40.807559,4444.369695,0.1415
std,577.494589,14326.327119,13.624469,3045.410024,0.348624
min,1.0,20014.48947,-52.42328,1.37763,0.0
25%,500.75,32796.459717,28.990415,1939.708847,0.0
50%,1000.5,45789.117313,41.317159,3974.719419,0.0
75%,1500.25,57791.281668,52.58704,6432.410625,0.0
max,2000.0,69995.685578,63.971796,13766.051239,1.0


In [6]:
# procurando valores faltantes de idade
base.loc[base.age.isnull().values == True]

Unnamed: 0,clientid,income,age,loan,default
28,29,59417.805406,,2082.625938,0
30,31,48528.852796,,6155.78467,0
31,32,23526.302555,,2862.010139,0


In [7]:
# substituindo valores faltantes de idade por zero
base.loc[base.age.isnull().values, 'age'] = 0

In [8]:
# procurando valores inconsistentes de idade
base.loc[base.age < 0]

Unnamed: 0,clientid,income,age,loan,default
15,16,50501.726689,-28.218361,3977.287432,0
21,22,32197.620701,-52.42328,4244.057136,0
26,27,63287.038908,-36.496976,9595.286289,0


In [9]:
# calculo da média de idade utilizando apenas os valores maiores que zero
age_mean = np.mean(base.loc[base.age > 0].age)
age_mean

40.92770044906149

In [10]:
# substituindo os valores inconsistentes de idade
base.loc[base.age <= 0, 'age'] = age_mean

In [12]:
# valores novos para idade
base.iloc[[28, 30, 31, 15, 21, 26]]

Unnamed: 0,clientid,income,age,loan,default
28,29,59417.805406,40.9277,2082.625938,0
30,31,48528.852796,40.9277,6155.78467,0
31,32,23526.302555,40.9277,2862.010139,0
15,16,50501.726689,40.9277,3977.287432,0
21,22,32197.620701,40.9277,4244.057136,0
26,27,63287.038908,40.9277,9595.286289,0


In [13]:
# selecionando os arrays de input e target que serão usados na rede neural
inputs = base.iloc[:, 1:4].values
targets = base.iloc[:, 4].values

In [14]:
inputs

array([[6.61559251e+04, 5.90170151e+01, 8.10653213e+03],
       [3.44151540e+04, 4.81171531e+01, 6.56474502e+03],
       [5.73171701e+04, 6.31080495e+01, 8.02095330e+03],
       ...,
       [4.43114493e+04, 2.80171669e+01, 5.52278669e+03],
       [4.37560566e+04, 6.39717958e+01, 1.62272260e+03],
       [6.94365796e+04, 5.61526170e+01, 7.37883360e+03]])

In [15]:
targets

array([0, 0, 0, ..., 1, 0, 0])

In [16]:
# instância do escalonador
scaler = StandardScaler()

In [17]:
# transformação dos inputs com o escalonador
inputs = scaler.fit_transform(inputs)

In [18]:
inputs

array([[ 1.45393393,  1.36538005,  1.20281942],
       [-0.76217555,  0.54265932,  0.69642695],
       [ 0.83682073,  1.67417101,  1.17471147],
       ...,
       [-0.07122592, -0.97448606,  0.35420081],
       [-0.11000289,  1.73936652, -0.92675625],
       [ 1.682986  ,  1.14917551,  0.96381038]])

In [19]:
# separação da base em dados de treinamento e de teste
training_inputs, test_inputs, training_targets, test_targets = train_test_split(inputs, targets)

In [20]:
len(training_inputs)

1500

In [21]:
len(test_inputs)

500

In [22]:
len(training_targets)

1500

In [23]:
len(test_targets)

500

In [29]:
# instância da rede neural de classificação do tipo multilayer perceptron

classifier = MLPClassifier(verbose=True)
#classifier = MLPClassifier(verbose=True, max_iter=500)
#classifier = MLPClassifier(verbose=True, max_iter=1000, tol=0.000010)

# treinamento da rede neural
classifier.fit(training_inputs, training_targets)

Iteration 1, loss = 0.59152645
Iteration 2, loss = 0.53548440
Iteration 3, loss = 0.48799110
Iteration 4, loss = 0.44696291
Iteration 5, loss = 0.41129968
Iteration 6, loss = 0.38077587
Iteration 7, loss = 0.35348290
Iteration 8, loss = 0.32943918
Iteration 9, loss = 0.30763540
Iteration 10, loss = 0.28831237
Iteration 11, loss = 0.27060500
Iteration 12, loss = 0.25474364
Iteration 13, loss = 0.24039577
Iteration 14, loss = 0.22769352
Iteration 15, loss = 0.21619573
Iteration 16, loss = 0.20565660
Iteration 17, loss = 0.19631170
Iteration 18, loss = 0.18776243
Iteration 19, loss = 0.17998042
Iteration 20, loss = 0.17294159
Iteration 21, loss = 0.16653100
Iteration 22, loss = 0.16055967
Iteration 23, loss = 0.15517043
Iteration 24, loss = 0.15006268
Iteration 25, loss = 0.14540694
Iteration 26, loss = 0.14115793
Iteration 27, loss = 0.13712647
Iteration 28, loss = 0.13337543
Iteration 29, loss = 0.12998869
Iteration 30, loss = 0.12667723
Iteration 31, loss = 0.12368646
Iteration 32, los

Iteration 258, loss = 0.02316783
Iteration 259, loss = 0.02312668
Iteration 260, loss = 0.02302169
Iteration 261, loss = 0.02293357
Iteration 262, loss = 0.02285919
Iteration 263, loss = 0.02284713
Iteration 264, loss = 0.02266667
Iteration 265, loss = 0.02263541
Iteration 266, loss = 0.02259018
Iteration 267, loss = 0.02253443
Iteration 268, loss = 0.02239002
Iteration 269, loss = 0.02230262
Iteration 270, loss = 0.02228080
Iteration 271, loss = 0.02211329
Iteration 272, loss = 0.02211424
Iteration 273, loss = 0.02204339
Iteration 274, loss = 0.02194781
Iteration 275, loss = 0.02184279
Iteration 276, loss = 0.02184305
Iteration 277, loss = 0.02173753
Iteration 278, loss = 0.02163602
Iteration 279, loss = 0.02167062
Iteration 280, loss = 0.02152762
Iteration 281, loss = 0.02140606
Iteration 282, loss = 0.02138753
Iteration 283, loss = 0.02130430
Iteration 284, loss = 0.02123470
Iteration 285, loss = 0.02127336
Iteration 286, loss = 0.02118305
Iteration 287, loss = 0.02102844
Iteration 

Iteration 519, loss = 0.01161040
Iteration 520, loss = 0.01169085
Iteration 521, loss = 0.01156028
Iteration 522, loss = 0.01155690
Iteration 523, loss = 0.01150496
Iteration 524, loss = 0.01155683
Iteration 525, loss = 0.01148694
Iteration 526, loss = 0.01144147
Iteration 527, loss = 0.01145344
Iteration 528, loss = 0.01143691
Iteration 529, loss = 0.01136790
Iteration 530, loss = 0.01135716
Iteration 531, loss = 0.01135317
Iteration 532, loss = 0.01139214
Iteration 533, loss = 0.01126933
Iteration 534, loss = 0.01125510
Iteration 535, loss = 0.01130911
Iteration 536, loss = 0.01121259
Iteration 537, loss = 0.01122137
Iteration 538, loss = 0.01120908
Iteration 539, loss = 0.01120360
Iteration 540, loss = 0.01112727
Iteration 541, loss = 0.01116401
Iteration 542, loss = 0.01108837
Iteration 543, loss = 0.01109427
Iteration 544, loss = 0.01105922
Iteration 545, loss = 0.01106860
Iteration 546, loss = 0.01103865
Iteration 547, loss = 0.01100372
Iteration 548, loss = 0.01097775
Iteration 

Iteration 774, loss = 0.00734442
Iteration 775, loss = 0.00740446
Iteration 776, loss = 0.00728601
Iteration 777, loss = 0.00732789
Iteration 778, loss = 0.00726421
Iteration 779, loss = 0.00727363
Iteration 780, loss = 0.00723456
Iteration 781, loss = 0.00721374
Iteration 782, loss = 0.00723558
Iteration 783, loss = 0.00725894
Iteration 784, loss = 0.00723384
Iteration 785, loss = 0.00731720
Iteration 786, loss = 0.00725790
Iteration 787, loss = 0.00717775
Iteration 788, loss = 0.00714790
Iteration 789, loss = 0.00717289
Iteration 790, loss = 0.00715910
Iteration 791, loss = 0.00717952
Iteration 792, loss = 0.00716564
Iteration 793, loss = 0.00715419
Iteration 794, loss = 0.00721418
Iteration 795, loss = 0.00713691
Iteration 796, loss = 0.00713898
Iteration 797, loss = 0.00707676
Iteration 798, loss = 0.00709353
Iteration 799, loss = 0.00703534
Iteration 800, loss = 0.00702271
Iteration 801, loss = 0.00699026
Iteration 802, loss = 0.00700558
Iteration 803, loss = 0.00704241
Iteration 

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=1e-05,
       validation_fraction=0.1, verbose=True, warm_start=False)

In [30]:
# teste da rede neural
previsions = classifier.predict(test_inputs)

In [31]:
previsions

array([0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,

In [32]:
test_targets

array([0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,

In [34]:
precision = accuracy_score(test_targets, previsions)

In [35]:
precision

0.998

In [36]:
confusion_matrix(test_targets, previsions)

array([[436,   0],
       [  1,  63]])