In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [37]:
%load_ext tensorboard

In [5]:
columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

data = pd.read_table('housing.data', delim_whitespace=True, header=None, names=columns)
data

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273.0,21.0,393.45,6.48,22.0


In [6]:
np.random.seed(1)
train = data.sample(frac=0.8).copy()
y_train = train['MEDV']
train.drop('MEDV', axis=1, inplace=True)

In [9]:
test = data.loc[~data.index.isin(train.index)].copy()
y_test = test['MEDV']
test.drop('MEDV', axis=1, inplace=True)

In [27]:
learning_rate = 0.05

def make_input_fn(data, label, num_epochs=10, shuffle=True, batch_size=256):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data), label))
        if shuffle:
            ds = ds.shuffle(1000)
        
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function


def define_feature_columns(data, categorical_cols, numeric_cols):
    feature_columns = list()
    
    for feature_name in numeric_cols:
        feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
        
    for feature_name in categorical_cols:
        vocabulary = data[feature_name].unique()
        feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
    
    return feature_columns

In [28]:
categorical_cols = ['CHAS', 'RAD']
numeric_cols = ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
feature_columns = define_feature_columns(data, categorical_cols, numeric_cols)
train_input_fn = make_input_fn(train, y_train, num_epochs=1400)
test_input_fn = make_input_fn(test, y_test, num_epochs=1, shuffle=False)

In [47]:
output_dir = 'E:\GITHUB\Machine-Learning-Using-TensorFlow-Cookbook\logs\LinearRegressor_1'
linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns, model_dir = output_dir,
                                          config=tf.estimator.RunConfig().replace(save_summary_steps=100))


INFO:tensorflow:Using config: {'_model_dir': 'E:\\GITHUB\\Machine-Learning-Using-TensorFlow-Cookbook\\logs\\LinearRegressor_1', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [48]:
linear_est.train(train_input_fn)
result = linear_est.evaluate(test_input_fn)
print(result)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into E:\GITHUB\Machine-Learning-Using-TensorFlow-Cookbook\logs\LinearRegressor_1\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 578.54846, step = 0
INFO:tensorflow:global_step/sec: 265.89
INFO:tensorflow:loss = 58.053375, step = 100 (0.378 sec)
INFO:tensorflow:global_step/sec: 400.844
INFO:tensorflow:loss = 59.848156, step = 200 (0.247 sec)
INFO:tensorflow:global_step/sec: 375.094
INFO:tensorflow:loss = 49.909866, step = 300 (0.267 sec)
INFO:tensorflow:global_step/sec: 390.293
INFO:tensorflow:loss = 50.040386, step = 400 (0.256 sec)
INFO:tensorflow:global_step/sec: 391.252
INFO:

In [49]:
%tensorboard --logdir ./logs/LinearRegressor_1

Reusing TensorBoard on port 6006 (pid 19512), started 0:02:35 ago. (Use '!kill 19512' to kill it.)

In [50]:
def create_interactions(interactions_list, buckets=5):
    interactions = list()
    for (a, b) in interactions_list:
        interactions.append(tf.feature_column.crossed_column([a, b], hash_bucket_size=buckets))
        
    return interactions

In [51]:
derived_feature_columns = create_interactions([['RM', 'LSTAT']])
linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns+derived_feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(test_input_fn)
print(result)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Siddh\\AppData\\Local\\Temp\\tmp_lmkar3e', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorf

In [55]:
def dicts_to_preds(pred_dicts):
    return np.array([pred['predictions'] for pred in pred_dicts])

preds = dicts_to_preds(linear_est.predict(test_input_fn))
print(preds)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Siddh\AppData\Local\Temp\tmp_lmkar3e\model.ckpt-2800
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[[24.684187 ]
 [30.252073 ]
 [19.658419 ]
 [21.394066 ]
 [17.697195 ]
 [16.679813 ]
 [17.953295 ]
 [12.3342495]
 [23.303185 ]
 [25.137661 ]
 [20.675009 ]
 [16.966297 ]
 [28.49061  ]
 [16.722569 ]
 [19.48644  ]
 [20.751162 ]
 [23.864828 ]
 [22.660883 ]
 [24.461622 ]
 [22.4516   ]
 [26.352623 ]
 [24.904194 ]
 [22.672369 ]
 [19.140184 ]
 [20.993477 ]
 [11.68483  ]
 [17.16812  ]
 [21.150965 ]
 [20.048054 ]
 [ 5.4768624]
 [12.776049 ]
 [ 9.833223 ]
 [19.635878 ]
 [19.09492  ]
 [15.149757 ]
 [33.344868 ]
 [27.358667 ]
 [28.697231 ]
 [33.976902 ]
 [33.992447 ]
 [34.76483  ]
 [15.477875 ]
 [19.988604 ]
 [23.757557 ]
 [36.139313 ]
 [26.03589  ]
 [32.27317  ]
 [24.399637 ]
 [19.953306 ]
 [22.41643  ]
 [2

In [9]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras

In [10]:
columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

data = pd.read_table('housing.data', delim_whitespace=True, header=None, names=columns)
data

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273.0,21.0,393.45,6.48,22.0


In [21]:
def define_feature_column_layers(data, categorical_cols, numeric_cols):
    feature_columns = list()
    feature_layer_inputs = dict()
    
    for feature_name in numeric_cols:
        feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
        feature_layer_inputs[feature_name] = tf.keras.Input(shape=(1,), name=feature_name)
        
    for feature_name in categorical_cols:
        vocabulary = data[feature_name].unique()
        cat = tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)
        cat_one_hot = tf.feature_column.indicator_column(cat)
        feature_columns.append(cat_one_hot)
        feature_layer_inputs[feature_name] = tf.keras.Input(shape=(1,), name=feature_name, dtype=tf.int32)
        
    return feature_columns, feature_layer_inputs

In [22]:
def create_interactions(interactions_list, buckets=5):
    feature_columns = list()
    for (a, b) in interactions_list:
        crossed_feature = tf.feature_column.crossed_column([a, b], hash_size=buckets)
        crossed_feature_one_hot = tf.fearure_column.indicator_column(crossed_feature)
        feature_columns.append(crossed_feature_one_hot)

In [25]:
def create_linreg(feature_columns, feature_layer_inputs, optimizer):
    feature_layer = keras.layers.DenseFeatures(feature_columns)
    feature_layer_outputs = feature_layer(feature_layer_inputs)
    norm = keras.layers.BatchNormalization()(feature_layer_outputs)
    outputs = keras.layers.Dense(1, kernel_initializer='normal', activation='linear')(norm)
    model = keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=outputs)
    model.compile(optimizer=optimizer, loss="mean_squared_error")
    return model

In [26]:
categorical_cols = ['CHAS', 'RAD']
numeric_cols = ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
feature_columns, feature_layer_inputs = define_feature_column_layers(data, categorical_cols, numeric_cols)
optimizer = keras.optimizers.Ftrl(learning_rate=0.02)
model = create_linreg(feature_columns, feature_layer_inputs, optimizer)

In [28]:
import tempfile

def canned_keras(model):
    model_dir = tempfile.mkdtemp()
    keras_estimator = keras.estimator.model_to_estimator(keras_model=model, model_dir=model_dir)
    return keras_estimator

In [29]:
estimator = canned_keras(model)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using the Keras model provided.
Instructions for updating:
Colocations handled automatically by placer.




Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Siddh\\AppData\\Local\\Temp\\tmplif1mp09', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replica

In [30]:
def make_input_fn(data, label, num_epochs=10, shuffle=True, batch_size=256):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data), label))
        if shuffle:
            ds = ds.shuffle(1000)
        
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

In [33]:
np.random.seed(1)
train = data.sample(frac=0.8).copy()
y_train = train['MEDV']
train.drop('MEDV', axis=1, inplace=True)

In [34]:
test = data.loc[~data.index.isin(train.index)].copy()
y_test = test['MEDV']
test.drop('MEDV', axis=1, inplace=True)

In [36]:
train_input_fn = make_input_fn(train, y_train, num_epochs=1400)
test_input_fn = make_input_fn(test, y_test, num_epochs=1, shuffle=False)
estimator.train(train_input_fn)
result = estimator.evaluate(test_input_fn)
print(result)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.




INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='C:\\Users\\Siddh\\AppData\\Local\\Temp\\tmplif1mp09\\keras\\keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})
INFO:tensorflow:Warm-starting from: C:\Users\Siddh\AppData\Local\Temp\tmplif1mp09\keras\keras_model.ckpt
INFO:tensorflow:Warm-starting variables only in TRAINABLE_VARIABLES.
INFO:tensorflow:Warm-started 4 variables.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\Siddh\AppData\Local\Temp\tmplif1mp09\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 610.5638, step = 0
INFO:tensorflow:global_step/

  updates = self.state_updates


INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Siddh\AppData\Local\Temp\tmplif1mp09\model.ckpt-2800
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.16640s
INFO:tensorflow:Finished evaluation at 2022-10-31-10:43:30
INFO:tensorflow:Saving dict for global step 2800: global_step = 2800, loss = 24.923407
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2800: C:\Users\Siddh\AppData\Local\Temp\tmplif1mp09\model.ckpt-2800
{'loss': 24.923407, 'global_step': 2800}
