In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import shutil

print(tf.__version__)

1.12.0


In [3]:
# In CSV, label is the first column, after the features, followed by the key
CSV_COLUMNS = ['fare_amount', 'pickuplon','pickuplat','dropofflon','dropofflat','passengers', 'key']
FEATURES = CSV_COLUMNS[1:len(CSV_COLUMNS) - 1]
LABEL = CSV_COLUMNS[0]

In [9]:
#Read the data by panads and assigned them the below variables
df_train = pd.read_csv('./taxi-train.csv', header=None, names=CSV_COLUMNS)
df_valid = pd.read_csv('./taxi-valid.csv', header=None, names=CSV_COLUMNS)
df_test = pd.read_csv('./taxi-test.csv', header=None, names=CSV_COLUMNS)

In [12]:
#Train and evaluate input functions to read from Pandas Dataframe
def make_train_input_fn(df, num_epochs):
    return tf.estimator.inputs.pandas_input_fn(x = df,
                                               y = df[LABEL],
                                               batch_size=128,
                                               num_epochs=num_epochs,
                                               shuffle=True,
                                               queue_capacity=1000
    )

def make_eval_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(x = df,
                                               y = df[LABEL],
                                               batch_size=128,
                                               shuffle=False,
                                               queue_capacity=1000
    )

In [13]:
#Predict function
def make_predict_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(x = df,
                                               y = None,
                                               batch_size=128,
                                               shuffle=False,
                                               queue_capacity=1000
    )

In [14]:
#create feature columns for estimator
def make_feature_cols():
    input_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
    return input_cols

In [20]:
#use a pre-built model in estimator, LinarRegression
tf.logging.set_verbosity(tf.logging.INFO)

OUTDIR = 'taxi_trained'
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

model = tf.estimator.LinearRegressor(feature_columns=make_feature_cols(), model_dir=OUTDIR)
model.train(input_fn=make_train_input_fn(df_train, num_epochs=10))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'taxi_trained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000174A6903EB8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph wa

<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x1749f68b320>

In [25]:
#Evaluate by using the validation dataset
def print_rmse(model, df):
    metrics = model.evaluate(input_fn=make_eval_input_fn(df))
    metrics
    print('RMSE on dataset = {}'.format(np.sqrt(metrics['average_loss'])))
print_rmse(model, df_valid)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-01-20-12:42:39
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from taxi_trained\model.ckpt-608
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-01-20-12:42:40
INFO:tensorflow:Saving dict for global step 608: average_loss = 108.82914, global_step = 608, label/mean = 11.666427, loss = 12942.894, prediction/mean = 11.625585
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 608: taxi_trained\model.ckpt-608
RMSE on dataset = 10.432120323181152


In [28]:
predictions = model.predict(input_fn=make_predict_input_fn(df_test))
for items in predictions:
    print(items)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from taxi_trained\model.ckpt-608
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'predictions': array([11.578855], dtype=float32)}
{'predictions': array([11.576151], dtype=float32)}
{'predictions': array([11.577067], dtype=float32)}
{'predictions': array([11.574467], dtype=float32)}
{'predictions': array([11.578578], dtype=float32)}
{'predictions': array([11.578343], dtype=float32)}
{'predictions': array([11.576837], dtype=float32)}
{'predictions': array([11.576871], dtype=float32)}
{'predictions': array([11.578743], dtype=float32)}
{'predictions': array([11.576458], dtype=float32)}
{'predictions': array([11.578855], dtype=float32)}
{'predictions': array([11.579031], dtype=float32)}
{'predictions': array([11.571897], dtype=float32)}
{'predictions': array([11.576056], dtype=float32)}
{'predictions': array([

{'predictions': array([11.717365], dtype=float32)}
{'predictions': array([11.571801], dtype=float32)}
{'predictions': array([11.7842045], dtype=float32)}
{'predictions': array([11.576015], dtype=float32)}
{'predictions': array([11.578497], dtype=float32)}
{'predictions': array([11.574624], dtype=float32)}
{'predictions': array([11.645275], dtype=float32)}
{'predictions': array([11.576908], dtype=float32)}
{'predictions': array([11.918915], dtype=float32)}
{'predictions': array([11.576358], dtype=float32)}
{'predictions': array([11.577132], dtype=float32)}
{'predictions': array([11.575934], dtype=float32)}
{'predictions': array([11.646162], dtype=float32)}
{'predictions': array([11.580564], dtype=float32)}
{'predictions': array([11.648554], dtype=float32)}
{'predictions': array([11.645509], dtype=float32)}
{'predictions': array([11.850078], dtype=float32)}
{'predictions': array([11.698676], dtype=float32)}
{'predictions': array([11.577872], dtype=float32)}
{'predictions': array([11.7833

{'predictions': array([11.6411295], dtype=float32)}
{'predictions': array([11.574171], dtype=float32)}
{'predictions': array([11.579143], dtype=float32)}
{'predictions': array([11.648189], dtype=float32)}
{'predictions': array([11.574215], dtype=float32)}
{'predictions': array([11.919293], dtype=float32)}
{'predictions': array([11.579424], dtype=float32)}
{'predictions': array([11.576306], dtype=float32)}
{'predictions': array([11.574037], dtype=float32)}
{'predictions': array([11.57393], dtype=float32)}
{'predictions': array([11.646676], dtype=float32)}
{'predictions': array([11.576696], dtype=float32)}
{'predictions': array([11.577319], dtype=float32)}
{'predictions': array([11.575812], dtype=float32)}
{'predictions': array([11.715355], dtype=float32)}
{'predictions': array([11.57599], dtype=float32)}
{'predictions': array([11.580064], dtype=float32)}
{'predictions': array([11.576311], dtype=float32)}
{'predictions': array([11.579095], dtype=float32)}
{'predictions': array([11.577742

{'predictions': array([11.784424], dtype=float32)}
{'predictions': array([11.577949], dtype=float32)}
{'predictions': array([11.578187], dtype=float32)}
{'predictions': array([11.577348], dtype=float32)}
{'predictions': array([11.646178], dtype=float32)}
{'predictions': array([11.713912], dtype=float32)}
{'predictions': array([11.576983], dtype=float32)}
{'predictions': array([11.576552], dtype=float32)}
{'predictions': array([11.577378], dtype=float32)}
{'predictions': array([11.578853], dtype=float32)}
{'predictions': array([11.563811], dtype=float32)}
{'predictions': array([11.562679], dtype=float32)}
{'predictions': array([11.578074], dtype=float32)}
{'predictions': array([11.644291], dtype=float32)}
{'predictions': array([11.5777025], dtype=float32)}
{'predictions': array([11.646267], dtype=float32)}
{'predictions': array([11.713878], dtype=float32)}
{'predictions': array([11.645861], dtype=float32)}
{'predictions': array([11.578456], dtype=float32)}
{'predictions': array([11.5762

{'predictions': array([11.57675], dtype=float32)}
{'predictions': array([11.572099], dtype=float32)}
{'predictions': array([11.575491], dtype=float32)}
{'predictions': array([11.646267], dtype=float32)}
{'predictions': array([11.644356], dtype=float32)}
{'predictions': array([11.85], dtype=float32)}
{'predictions': array([11.57633], dtype=float32)}
{'predictions': array([11.57668], dtype=float32)}
{'predictions': array([11.577407], dtype=float32)}
{'predictions': array([11.781802], dtype=float32)}
{'predictions': array([11.645104], dtype=float32)}
{'predictions': array([11.579475], dtype=float32)}
{'predictions': array([11.641034], dtype=float32)}
{'predictions': array([11.57684], dtype=float32)}
{'predictions': array([11.577284], dtype=float32)}
{'predictions': array([11.847682], dtype=float32)}
{'predictions': array([11.850812], dtype=float32)}
{'predictions': array([11.5769205], dtype=float32)}
{'predictions': array([11.576379], dtype=float32)}
{'predictions': array([11.920716], dty

{'predictions': array([11.578695], dtype=float32)}
{'predictions': array([11.709472], dtype=float32)}
{'predictions': array([11.576598], dtype=float32)}
{'predictions': array([11.575988], dtype=float32)}
{'predictions': array([11.577664], dtype=float32)}
{'predictions': array([11.918955], dtype=float32)}
{'predictions': array([11.7149935], dtype=float32)}
{'predictions': array([11.644534], dtype=float32)}
{'predictions': array([11.579507], dtype=float32)}
{'predictions': array([11.576665], dtype=float32)}
{'predictions': array([11.578244], dtype=float32)}
{'predictions': array([11.57637], dtype=float32)}
{'predictions': array([11.849246], dtype=float32)}
{'predictions': array([11.5664425], dtype=float32)}
{'predictions': array([11.576271], dtype=float32)}
{'predictions': array([11.576934], dtype=float32)}
{'predictions': array([11.576994], dtype=float32)}
{'predictions': array([11.851136], dtype=float32)}
{'predictions': array([11.712451], dtype=float32)}
{'predictions': array([11.5771

In [30]:
#Let's try deep neural network regression
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
model = tf.estimator.DNNRegressor(hidden_units = [64, 32, 2],
      feature_columns = make_feature_cols(), model_dir = OUTDIR)
model.train(input_fn = make_train_input_fn(df_train, num_epochs = 200));
print_rmse(model, df_valid)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'taxi_trained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000174A930C390>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph wa

INFO:tensorflow:loss = 11262.41, step = 7201 (0.396 sec)
INFO:tensorflow:global_step/sec: 236.899
INFO:tensorflow:loss = 8640.696, step = 7301 (0.421 sec)
INFO:tensorflow:global_step/sec: 205.214
INFO:tensorflow:loss = 13340.951, step = 7401 (0.487 sec)
INFO:tensorflow:global_step/sec: 251.22
INFO:tensorflow:loss = 12722.576, step = 7501 (0.399 sec)
INFO:tensorflow:global_step/sec: 224.627
INFO:tensorflow:loss = 7757.235, step = 7601 (0.443 sec)
INFO:tensorflow:global_step/sec: 211.301
INFO:tensorflow:loss = 10208.061, step = 7701 (0.474 sec)
INFO:tensorflow:global_step/sec: 214.008
INFO:tensorflow:loss = 9137.534, step = 7801 (0.468 sec)
INFO:tensorflow:global_step/sec: 221.127
INFO:tensorflow:loss = 8554.171, step = 7901 (0.450 sec)
INFO:tensorflow:global_step/sec: 219.226
INFO:tensorflow:loss = 9986.307, step = 8001 (0.457 sec)
INFO:tensorflow:global_step/sec: 216.329
INFO:tensorflow:loss = 11893.18, step = 8101 (0.462 sec)
INFO:tensorflow:global_step/sec: 222.636
INFO:tensorflow:lo