## Preparación de datos

In [1]:
import pandas as pd

PREDICTORS = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status", 
                                        "occupation", "relationship", "race", "sex", "capital-gain",
                                        "capital-loss", "hours-per-week", "native-country"]
PREDICTORS_STRING = ["workclass", "education", "marital-status", "occupation", "relationship", "race", "sex", "native-country"]
PREDICTORS_INT = ["age", "fnlwgt", "capital-gain", "capital-loss", "hours-per-week"]
TARGET_VARIABLE = "class"

data = pd.read_csv('adult.data', names=["age", "workclass", "fnlwgt", "education", "education-num", "marital-status", 
                                        "occupation", "relationship", "race", "sex", "capital-gain",
                                        "capital-loss", "hours-per-week", "native-country", "class"])

def from_string_to_int(key, data_set):
    data_set[key],class_names = pd.factorize(data_set[key])
    return class_names

class_names = from_string_to_int(TARGET_VARIABLE, data)

for predictor in PREDICTORS_STRING:
    from_string_to_int(predictor, data)
    
X = data.iloc[:,:-1]
y = data.iloc[:,-1]


def print_5_cases(X, y, predictions):
    for i in range(5):
        print(X.iloc)
        print('%s => %d (expected %d)' % (X.iloc[i].tolist(), predictions[i], y.iloc[i]))

## Arquitectura 1

In [2]:
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense


# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=14, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X, y, epochs=150, batch_size=10, verbose=0)
# make class predictions with the model
predictions_1 = (model.predict(X) > 0.5).astype(int)
# summarize the first 5 cases
print_5_cases(X, y, predictions_1)

<pandas.core.indexing._iLocIndexer object at 0x000001EDFF038DB0>
[39, 0, 77516, 0, 13, 0, 0, 0, 0, 0, 2174, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF0386D0>
[50, 1, 83311, 0, 13, 1, 1, 1, 0, 0, 0, 0, 13, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF038DB0>
[38, 2, 215646, 1, 9, 2, 2, 0, 0, 0, 0, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF038EA0>
[53, 2, 234721, 2, 7, 1, 2, 1, 1, 0, 0, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF038BD0>
[28, 2, 338409, 0, 13, 1, 3, 2, 1, 1, 0, 0, 40, 1] => 0 (expected 0)


## Arquitectura 2

In [3]:
model = Sequential()
model.add(Dense(12, input_dim=14, activation='relu'))
model.add(Dense(16, activation="softmax"))
model.add(Dense(20, activation="softsign"))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X, y, epochs=150, batch_size=10, verbose=0)
# make class predictions with the model
predictions_2 = (model.predict(X) > 0.5).astype(int)
# summarize the first 5 cases
print_5_cases(X, y, predictions_2)

<pandas.core.indexing._iLocIndexer object at 0x000001EDFF58C310>
[39, 0, 77516, 0, 13, 0, 0, 0, 0, 0, 2174, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF58C310>
[50, 1, 83311, 0, 13, 1, 1, 1, 0, 0, 0, 0, 13, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF58C310>
[38, 2, 215646, 1, 9, 2, 2, 0, 0, 0, 0, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF58C310>
[53, 2, 234721, 2, 7, 1, 2, 1, 1, 0, 0, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF58C310>
[28, 2, 338409, 0, 13, 1, 3, 2, 1, 1, 0, 0, 40, 1] => 0 (expected 0)


## Arquitectura 3

In [4]:
model = Sequential()
model.add(Dense(12, input_dim=14, activation='relu'))
model.add(Dense(16, activation="softmax"))
model.add(Dense(20, activation="softsign"))
model.add(Dense(22, activation="tanh"))
model.add(Dense(24, activation="selu"))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X, y, epochs=150, batch_size=10, verbose=0)
# make class predictions with the model
predictions_3 = (model.predict(X) > 0.5).astype(int)
# summarize the first 5 cases

print_5_cases(X, y, predictions_3)

<pandas.core.indexing._iLocIndexer object at 0x000001EDFF44BB30>
[39, 0, 77516, 0, 13, 0, 0, 0, 0, 0, 2174, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF44BE50>
[50, 1, 83311, 0, 13, 1, 1, 1, 0, 0, 0, 0, 13, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF44BB30>
[38, 2, 215646, 1, 9, 2, 2, 0, 0, 0, 0, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF44BE50>
[53, 2, 234721, 2, 7, 1, 2, 1, 1, 0, 0, 0, 40, 0] => 0 (expected 0)
<pandas.core.indexing._iLocIndexer object at 0x000001EDFF44BB30>
[28, 2, 338409, 0, 13, 1, 3, 2, 1, 1, 0, 0, 40, 1] => 0 (expected 0)
