In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow_datasets as tfds
import tensorflow.keras as keras


tfds.disable_progress_bar()

In [2]:
def define_feature_columns_layers(data_df, categorical_cols, numeric_cols):
    feature_columns = []
    feature_layer_inputs = {}
    
    for feature_name in numeric_cols:
        feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
        feature_layer_inputs[feature_name] = tf.keras.Input(shape=(1,), name=feature_name)
        
    for feature_name in categorical_cols:
        vocabulary = data_df[feature_name].unique()
        cat = tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)
        cat_one_hot = tf.feature_column.indicator_column(cat)
        feature_columns.append(cat_one_hot)
        feature_layer_inputs[feature_name] = tf.keras.Input(shape=(1,), name=feature_name, dtype=tf.int32)
        
    return feature_columns, feature_layer_inputs

In [3]:
def create_interactions(interactions_list, buckets=5):
    feature_columns = []
    
    for (a, b) in interactions_list:
        crossed_feature = tf.feature_column.crossed_column([a, b], hash_bucket_size=buckets)
        crossed_feature_one_hot = tf.feature_column.indicator_column(crossed_feature)
        feature_columns.append(crossed_feature_one_hot)
        
    return feature_columns

In [4]:
def create_linreg(feature_columns, feature_layer_inputs, optimizer):
    feature_layer = keras.layers.DenseFeatures(feature_columns)
    feature_layer_outputs = feature_layer(feature_layer_inputs)
    norm = keras.layers.BatchNormalization()(feature_layer_outputs)
    outputs = keras.layers.Dense(1, kernel_initializer='normal', activation='linear')(norm)
    
    model = keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=outputs)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

In [5]:
housing_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
path = tf.keras.utils.get_file(housing_url.split("/")[-1], housing_url)

columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE',
           'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
data = pd.read_table(path, delim_whitespace=True, header=None, names=columns)

In [6]:
categorical_cols = ['CHAS', 'RAD']
numeric_cols = ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
feature_columns, feature_layer_inputs = define_feature_columns_layers(data, categorical_cols, numeric_cols)
interactions_columns = create_interactions([['RM', 'LSTAT']])

feature_columns += interactions_columns

optimizer = keras.optimizers.Ftrl(learning_rate=0.02)
model = create_linreg(feature_columns, feature_layer_inputs, optimizer)

In [7]:
import tempfile

def canned_keras(model):
    model_dir = tempfile.mkdtemp()
    keras_estimator = tf.keras.estimator.model_to_estimator(keras_model=model, model_dir=model_dir)
    return keras_estimator

estimator = canned_keras(model)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using the Keras model provided.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp4xua2uha', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global

In [8]:
learning_rate = 0.5
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=256):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

In [10]:
np.random.seed(1)
train = data.sample(frac=0.8).copy()
y_train = train['MEDV']
train.drop('MEDV', axis=1, inplace=True)

test= data.loc[~data.index.isin(train.index)].copy()
y_test = test['MEDV']
test.drop('MEDV', axis=1, inplace=True)

In [12]:
train_input_fn = make_input_fn(train, y_train, num_epochs=1400)
test_input_fn = make_input_fn(test, y_test, num_epochs=1, shuffle=False)

estimator.train(train_input_fn)
result = estimator.evaluate(test_input_fn)

print(result)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp4xua2uha/model.ckpt-2800
Instructions for updating:
Use standard file utilities to get mtimes.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 2800...
INFO:tensorflow:Saving checkpoints for 2800 into /tmp/tmp4xua2uha/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 2800...
INFO:tensorflow:loss = 25.710728, step = 2800
INFO:tensorflow:global_step/sec: 145.93
INFO:tensorflow:loss = 22.670687, step = 2900 (0.688 sec)
INFO:tensorflow:global_step/sec: 147.478
INFO:tensorflow:loss = 20.927778, step = 3000 (0.678 sec)
INFO:tensorflow:global_step/sec: 147.222
INFO:tensorflow:loss = 23.12601, step = 3100 (0.679 sec)
INFO:tensorflow:global_step/sec: 160.308
INF

In [13]:
weights = estimator.get_variable_value('layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE')
print(weights)

[[-0.82753277]
 [-0.9197132 ]
 [ 0.9121166 ]
 [ 0.8751093 ]
 [ 0.93666166]
 [-1.3062418 ]
 [-0.6605524 ]
 [-1.4155335 ]
 [ 0.858231  ]
 [-0.8840785 ]
 [ 0.87800807]
 [-0.9012212 ]
 [-0.90697014]
 [-0.8478116 ]
 [-0.9612655 ]
 [-0.9268641 ]
 [-0.8637465 ]
 [ 0.8671663 ]
 [-0.8810288 ]
 [-0.888186  ]
 [ 0.85306275]
 [ 0.9141941 ]
 [-0.9207836 ]
 [ 0.83163726]
 [ 1.3425697 ]
 [-0.7727633 ]
 [-0.86899364]]


In [16]:
def extract_labels(feature_columns):
    labels = list()
    
    for col in feature_columns:
        col_config = col.get_config()
        if 'key' in col_config:
            labels.append(col_config['key'])
        elif 'categorical_column' in col_config:
            if col_config['categorical_column']['class_name'] == 'VocabolaryListCategoricalColumn':
                key = col_config['categorical_column']['config']['key']
                for item in col_config['categorical_column']['config']['vocabulary_list']:
                    labels.append(key+'_val='+str(item))
            elif col_config['categorical_column']['class_name'] == 'CrossedColumn':
                keys = col_config['categorical_column']['config']['keys']
                for bucket in range(col_config['categorical_column']['config']['hash_bucket_size']):
                    labels.append('x'.join(keys)+'_bkt_'+str(bucket))
                    
    return labels

In [17]:
labels = extract_labels(feature_columns)

for label, weight in zip(labels, weights):
    print(f"{label:15s} : {weight[0]:+.2f}")

CRIM            : -0.83
ZN              : -0.92
INDUS           : +0.91
NOX             : +0.88
RM              : +0.94
AGE             : -1.31
DIS             : -0.66
TAX             : -1.42
PTRATIO         : +0.86
B               : -0.88
LSTAT           : +0.88
RMxLSTAT_bkt_0  : -0.90
RMxLSTAT_bkt_1  : -0.91
RMxLSTAT_bkt_2  : -0.85
RMxLSTAT_bkt_3  : -0.96
RMxLSTAT_bkt_4  : -0.93
