In [1]:
import pandas as pd
base = pd.read_csv('credit_data.csv')
base = base.drop('i#clientid', axis = 1)
base = base.dropna()
from sklearn.preprocessing import StandardScaler
scaler_x = StandardScaler()
base[['income', 'age', 'loan']] = scaler_x.fit_transform(base[['income', 'age', 'loan']])
X = base.drop('c#default', axis = 1)
y = base['c#default']
X.head()

Unnamed: 0,income,age,loan
0,1.453898,1.336861,1.201907
1,-0.762398,0.536639,0.695744
2,0.836733,1.637207,1.173812
3,-0.183244,0.362998,0.544366
4,1.509532,-1.631534,1.419754


In [2]:
neuronios_entrada = 3
neuronios_oculta = 2
neuronios_saida = neuronios_entrada
# 3 -> 2 -> 3

In [3]:
import tensorflow as tf
tf.random.set_seed(42)

tf.__version__

'2.3.0'

In [4]:
camada_oculta = tf.keras.layers.Dense(units = neuronios_oculta, activation = None)
camada_saida = tf.keras.layers.Dense(units = neuronios_saida, activation = None)

In [5]:
X = tf.convert_to_tensor(X)

In [6]:
otimizador = tf.keras.optimizers.Adam(learning_rate = 0.01)

In [7]:
for epoca in range(1000):
    with tf.GradientTape() as tape:
        X2d_encode = camada_oculta(X)
        X3d_decode = camada_saida(X2d_encode)
        erro = tf.keras.losses.MSE(X, X3d_decode)

    # A lista de variáveis treináveis só é criada depois que o modelo já processou
    # um batch, pois a partir dos dados de entrada ele deduz os formatos das matrizes
    # de peso e bias, por isso só estou criando a variável `var_list` aqui dentro
    # do loop de treinamento
    var_list = [*camada_oculta.trainable_variables, *camada_saida.trainable_variables]
    
    gradientes = tape.gradient(erro, var_list)
    
    otimizador.apply_gradients(zip(gradientes, var_list))

    if epoca % 100 == 0:
        print('erro: ' + str(tf.reduce_mean(erro).numpy()))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

erro: 1.6832432
erro: 0.2845752
erro: 0.21225584
erro: 0.1959397
erro: 0.1898937
erro: 0.18734665
erro: 0.186291
erro: 0.18587913
erro: 0.18573166
erro: 0.18568398


In [8]:
var_list

[<tf.Variable 'dense/kernel:0' shape=(3, 2) dtype=float32, numpy=
 array([[ 0.07045888,  0.45522794],
        [-1.6739728 ,  0.8288997 ],
        [-0.08337805,  0.53322965]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(2,) dtype=float32, numpy=array([-0.00658469,  0.0063526 ], dtype=float32)>,
 <tf.Variable 'dense_1/kernel:0' shape=(2, 3) dtype=float32, numpy=
 array([[ 0.5296163 , -0.5989116 ,  0.47895816],
        [ 1.017     , -0.00791579,  1.0196252 ]], dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(3,) dtype=float32, numpy=array([-0.0028742 , -0.0039484 , -0.00323724], dtype=float32)>]

In [9]:
X2d_encode.shape

TensorShape([1997, 2])

In [10]:
X3d_decode.shape

TensorShape([1997, 3])

In [11]:
X2 = scaler_x.inverse_transform(X.numpy())
X2

array([[6.61559251e+04, 5.90170151e+01, 8.10653213e+03],
       [3.44151540e+04, 4.81171531e+01, 6.56474502e+03],
       [5.73171701e+04, 6.31080495e+01, 8.02095330e+03],
       ...,
       [4.43114493e+04, 2.80171669e+01, 5.52278669e+03],
       [4.37560566e+04, 6.39717958e+01, 1.62272260e+03],
       [6.94365796e+04, 5.61526170e+01, 7.37883360e+03]])

In [12]:
X3d_decode2 = scaler_x.inverse_transform(X3d_decode.numpy())
X3d_decode2

array([[6.3491191e+04, 5.8785046e+01, 8.6717627e+03],
       [4.4500414e+04, 4.8997223e+01, 4.4279058e+03],
       [5.8681355e+04, 6.3227535e+01, 7.7323657e+03],
       ...,
       [4.7932316e+04, 2.8333096e+01, 4.7558184e+03],
       [3.6872176e+04, 6.3370003e+01, 3.0814448e+03],
       [6.3559410e+04, 5.5640202e+01, 8.6247744e+03]], dtype=float32)

In [13]:
from sklearn.metrics import mean_absolute_error
mae_income = mean_absolute_error(X2[:, 0], X3d_decode2[:, 0])
mae_income

6258.668414541674

In [14]:
mae_age = mean_absolute_error(X2[:, 1], X3d_decode2[:, 1])
mae_age

0.5463382434053274

In [15]:
mae_loan = mean_absolute_error(X2[:, 2], X3d_decode2[:, 2])
mae_loan

1326.1653437809393

In [16]:
X_encode = pd.DataFrame({'atributo1': X2d_encode[:, 0], 'atributo2': X2d_encode[:, 1], 'classe': y})

In [17]:
X_encode.head()

Unnamed: 0,atributo1,atributo2,classe
0,-2.242158,2.417155,0
1,-1.016742,0.475124,0
2,-2.786119,2.370206,0
3,-0.67259,0.514092,0
4,2.712567,0.098152,1


In [18]:
colunas = [tf.feature_column.numeric_column(key = column) for column in X_encode.columns]
from sklearn.model_selection import train_test_split
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(X_encode, y, test_size = 0.3, random_state = 42)

In [19]:
def make_input_fn(X, y = None, num_epochs = None, shuffle = True, batch_size = 32):
    def input_function():
        # As variáveis previsoras precisam ser passadas na função seguinte como um dicionário com os mesmos nomes definidos na chamada de `tf.feature_column.numeric_column`
        ds = tf.data.Dataset.from_tensor_slices((dict(X), y))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

In [20]:
funcao_treinamento = make_input_fn(X_treinamento, y_treinamento, batch_size = 8)
config = tf.estimator.RunConfig(tf_random_seed = 42)
classificador = tf.estimator.DNNClassifier(feature_columns = colunas, hidden_units = [4, 4], config = config)
classificador.train(input_fn = funcao_treinamento, steps = 1000)

INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpe157obpc', '_tf_random_seed': 42, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:t

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x7f750631c400>

In [21]:
funcao_teste = make_input_fn(X_teste, y_teste, num_epochs = 1000, shuffle = False, batch_size = 8)
metricas_teste = classificador.evaluate(input_fn = funcao_teste, steps = 1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-09-08T18:28:04Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpe157obpc/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [100/1000]
INFO:tensorflow:Evaluation [200/1000]
INFO:tensorflow:Evaluation [300/1000]
INFO:tensorflow:Evaluation [400/1000]
INFO:tensorflow:Evaluation [500/1000]
INFO:tensorflow:Evaluation [600/1000]
INFO:tensorflow:Evaluation [700/1000]
INFO:tensorflow:Evaluation [800/1000]
INFO:tensorflow:Evaluation [900/1000]
INFO:tensorflow:Evaluation [1000/1000]
INFO:tensorflow:Inference Time : 1.20785s
INFO:tensorflow:Finished evaluation at 2020-09-08-18:28:05
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.9205, accuracy_baseline = 0.848875, auc = 0.9882175, auc_precision_recall = 0.9334115, average_loss = 0.4774202, global_step = 

In [22]:
metricas_teste

{'accuracy': 0.9205,
 'accuracy_baseline': 0.848875,
 'auc': 0.9882175,
 'auc_precision_recall': 0.9334115,
 'average_loss': 0.4774202,
 'global_step': 1000,
 'label/mean': 0.151125,
 'loss': 0.4774202,
 'precision': 0.9584,
 'prediction/mean': 0.37319165,
 'recall': 0.4954508}