In [1]:
from sklearn.datasets import make_classification

In [2]:
numeric_dataset = make_classification(
    n_samples=10_000, 
    n_features=25, 
    n_informative=10,
    n_classes=2)

In [3]:
x, y = numeric_dataset

In [4]:
import pandas as pd
import numpy as np

np.set_printoptions(precision=1)

In [5]:
num_categories = 10
for i in range(5):
    x[:, i] = pd.cut(x[:, i], num_categories, labels=False)

In [6]:
x[0]

array([ 3. ,  3. ,  2. ,  5. ,  6. ,  0.4, -0.9,  1. ,  0.1,  0.5, -0.9,
        0.8, -0.1, -1. ,  0.2,  0.4,  1.5, -0.4,  1.2,  0.1, -0.2,  1. ,
       -0.1,  0.4, -0.9])

In [7]:
x_numeric = x[:, 5:]
x_cat = x[:, :5]

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
ss = StandardScaler()

In [10]:
standardized_x = ss.fit_transform(x_numeric)

In [11]:
def emb_sz_rule(n_cat): 
    return min(600, round(1.6 * n_cat**0.56))

In [12]:
p = .1

In [13]:
import tensorflow as tf

In [14]:
numeric_inputs = tf.keras.layers.Input((20,), name='numeric_inputs')
cat_inputs = tf.keras.layers.Input((5,), name='cat_inputs')

In [18]:
embedding_layer = tf.keras.layers.Embedding(
    num_categories, 
    emb_sz_rule(num_categories), 
    input_length=5)
cat_x = embedding_layer(cat_inputs)
cat_x = tf.keras.layers.Flatten()(cat_x)

x = tf.keras.layers.Concatenate()([cat_x, numeric_inputs])

x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(100, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(20, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(10, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
out = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(x)

In [20]:
model = tf.keras.models.Model(
    inputs=[numeric_inputs, cat_inputs], outputs=out)
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [21]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
cat_inputs (InputLayer)         [(None, 5)]          0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 5, 6)         60          cat_inputs[0][0]                 
__________________________________________________________________________________________________
flatten (Flatten)               (None, 30)           0           embedding_3[0][0]                
__________________________________________________________________________________________________
numeric_inputs (InputLayer)     [(None, 20)]         0                                            
______________________________________________________________________________________________

In [24]:
import numpy as np

def bootstrap_sample_generator(batch_size):
    while True:
        batch_idx = np.random.choice(
            standardized_x.shape[0], batch_size)
        yield ({'numeric_inputs': standardized_x[batch_idx],
                'cat_inputs': x_cat[batch_idx]}, 
               {'output': y[batch_idx]})

In [27]:
batch_size = 32

model.fit_generator(
    bootstrap_sample_generator(batch_size),
    steps_per_epoch=10_000 / batch_size,
    epochs=5,
    max_queue_size=10,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x133d85898>