In [11]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical

Neste primeiro modelo, vamos prever o salário por hora dos funcionários.

In [12]:
# leitura e treinando dados
train_df = pd.read_csv('data/hourly_wages_data.csv')

# visualizando dados
train_df.head()

Unnamed: 0,wage_per_hour,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,5.1,0,8,21,35,1,1,0,1,0
1,4.95,0,9,42,57,1,1,0,1,0
2,6.67,0,12,1,19,0,0,0,1,0
3,4.0,0,12,4,22,0,0,0,0,0
4,7.5,0,12,17,35,0,1,0,0,0


In [6]:
# cria um dataframe com todos dados de treino exceto a coluna alvo
target = train_df['wage_per_hour']
train_X = train_df.drop(columns=['wage_per_hour'])

# verifica a coluna alvo foi removida 
train_X.head()

Unnamed: 0,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,0,8,21,35,1,1,0,1,0
1,0,9,42,57,1,1,0,1,0
2,0,12,1,19,0,0,0,1,0
3,0,12,4,22,0,0,0,0,0
4,0,12,17,35,0,1,0,0,0


In [7]:
# cria um dataframe unicamente com a coluna alvo
train_y = train_df[['wage_per_hour']]

#visualiza o dataframe
train_y.head()

Unnamed: 0,wage_per_hour
0,5.1
1,4.95
2,6.67
3,4.0
4,7.5


In [9]:
#cria modelo
model = Sequential()

#obter o número de colunas nos dados de treinamento
n_cols = train_X.shape[1]

#adicionar camadas de modelo
model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))

# compila o modelo usando o mse como uma medida de desempenho do modelo 
model.compile(optimizer='adam', loss='mean_squared_error')

# definir o monitor de parada antecipada para que o modelo pare de treinar quando ele não melhorar mais
early_stopping_monitor = EarlyStopping(patience=3)

In [10]:
# treinar modelo
model.fit(train_X, train_y, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


<tensorflow.python.keras.callbacks.History at 0x26a68acce88>

In [7]:
# treinar um novo modelo com os mesmos dados para mostrar o efeito do aumento da capacidade do modelo

# criar modelo
model_mc = Sequential()

# adicionar camadas no modelo
model_mc.add(Dense(200, activation='relu', input_shape=(n_cols,)))
model_mc.add(Dense(200, activation='relu'))
model_mc.add(Dense(200, activation='relu'))
model_mc.add(Dense(1))

# compilar o modelo usando o mse como uma medida de desempenho do modelo
model_mc.compile(optimizer='adam', loss='mean_squared_error')

In [8]:
# treinar modelo
model_mc.fit(train_X, train_y, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


<tensorflow.python.keras.callbacks.History at 0x22de43c2488>

Para este próximo modelo, vamos predizer se os pacientes têm diabetes ou não.

In [13]:
# lendo e treinando dados
train_df_2 = pd.read_csv('data/diabetes_data.csv')

# visualiza estrutura dos dados
train_df_2.head()

Unnamed: 0,pregnancies,glucose,diastolic,triceps,insulin,bmi,dpf,age,diabetes
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [14]:
# cria dataframe com todas as colunas exceto a coluna alvo

target = train_df_2['diabetes']

train_X_2 = train_df_2.drop(columns=['diabetes'])

# verifica se a coluna alvo foi removida
train_X_2.head()

Unnamed: 0,pregnancies,glucose,diastolic,triceps,insulin,bmi,dpf,age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [11]:
# coluna de destino de codificação one-hot
#one-hot encode target column
train_y_2 = to_categorical(train_df_2.diabetes)

#vcheck that target column has been converted
train_y_2[0:5]

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.]], dtype=float32)

In [12]:
#create model
model_2 = Sequential()

#get number of columns in training data
n_cols_2 = train_X_2.shape[1]

#add layers to model
model_2.add(Dense(250, activation='relu', input_shape=(n_cols_2,)))
model_2.add(Dense(250, activation='relu'))
model_2.add(Dense(250, activation='relu'))
model_2.add(Dense(2, activation='softmax'))

#compile model using accuracy to measure model performance
model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
train_X_2.shape

(768, 8)

In [15]:
#train model
model_2.fit(train_X_2, train_y_2, epochs=30, validation_split=0.2, callbacks=[early_stopping_monitor])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<tensorflow.python.keras.callbacks.History at 0x22de78c6b08>