In [1]:
import feather
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing

import tensorflow as tf

### Load NYC Taxi fare prepped data

In [2]:
train_df = feather.read_dataframe("../../datasets/kaggle/new-york-city-taxi-fare-prediction/train.feather")

In [3]:
train_df["hour_period"]=train_df["hour"] // 4

In [4]:
train_df = pd.get_dummies(train_df, prefix=["year","hour_period"], columns=["year","hour_period"])

In [5]:
print(train_df.shape)
print(train_df.columns)

(54246832, 31)
Index(['fare_amount', 'pickup_datetime', 'pickup_longitude', 'pickup_latitude',
       'dropoff_longitude', 'dropoff_latitude', 'passenger_count',
       'distance_miles', 'is_pickup_JFK_new', 'is_dropoff_JFK_new',
       'is_pickup_EWR_new', 'is_dropoff_EWR_new', 'is_pickup_LGA_new',
       'is_dropoff_LGA_new', 'is_to_from_JFK_new', 'distance_to_center',
       'hour', 'weekday', 'year_2009', 'year_2010', 'year_2011', 'year_2012',
       'year_2013', 'year_2014', 'year_2015', 'hour_period_0', 'hour_period_1',
       'hour_period_2', 'hour_period_3', 'hour_period_4', 'hour_period_5'],
      dtype='object')


In [6]:
cols = [
    "passenger_count",
    "distance_miles",
    "distance_to_center",
    "is_to_from_JFK_new",
    "year_2009",
    "year_2010",
    "year_2011",
    "year_2012",
    "year_2013",
    "year_2014",
    "year_2015",
    "hour_period_0",
    "hour_period_1",
    "hour_period_2",
    "hour_period_3",
    "hour_period_4",
    "hour_period_5",
]
x = train_df[cols].values
y = train_df[['fare_amount']].values

In [7]:
x_train, x_val, y_train, y_val = model_selection.train_test_split(
      x, y, test_size=0.1, random_state=42)

In [8]:
train_df = None
x = None
y = None

In [9]:
scaler = preprocessing.StandardScaler()
x_train_norm = scaler.fit_transform(x_train)



In [10]:
x_val_norm = scaler.transform(x_val)



In [11]:
x_train = None
x_val = None
x_train_norm.shape

(48822148, 17)

In [46]:
feature_columns = [
    tf.feature_column.numeric_column('x', shape=np.array(x_train_norm).shape[1:])]

In [47]:
from datetime import datetime
print(datetime.now())
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': x_train_norm}, y=y_train, batch_size=10000, num_epochs=25, shuffle=True) 
regressor = tf.estimator.DNNRegressor(
    feature_columns=feature_columns, hidden_units=[50, 25, 25])
regressor.train(input_fn=train_input_fn)
print(datetime.now())

2018-09-08 10:01:08.872058
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpi5s6r3bc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f395a4ccc50>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpi5

INFO:tensorflow:global_step/sec: 47.6082
INFO:tensorflow:loss = 173499.03, step = 7300 (2.100 sec)
INFO:tensorflow:global_step/sec: 52.2069
INFO:tensorflow:loss = 190425.5, step = 7400 (1.915 sec)
INFO:tensorflow:global_step/sec: 44.4412
INFO:tensorflow:loss = 157887.6, step = 7500 (2.252 sec)
INFO:tensorflow:global_step/sec: 48.9611
INFO:tensorflow:loss = 235423.9, step = 7600 (2.041 sec)
INFO:tensorflow:global_step/sec: 52.157
INFO:tensorflow:loss = 224782.44, step = 7700 (1.917 sec)
INFO:tensorflow:global_step/sec: 53.771
INFO:tensorflow:loss = 203985.48, step = 7800 (1.860 sec)
INFO:tensorflow:global_step/sec: 42.6969
INFO:tensorflow:loss = 177968.3, step = 7900 (2.342 sec)
INFO:tensorflow:global_step/sec: 51.4146
INFO:tensorflow:loss = 167168.61, step = 8000 (1.945 sec)
INFO:tensorflow:global_step/sec: 47.1843
INFO:tensorflow:loss = 184510.55, step = 8100 (2.119 sec)
INFO:tensorflow:global_step/sec: 52.3227
INFO:tensorflow:loss = 185845.84, step = 8200 (1.911 sec)
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 46.5058
INFO:tensorflow:loss = 223814.27, step = 15600 (2.150 sec)
INFO:tensorflow:global_step/sec: 53.1249
INFO:tensorflow:loss = 463006.56, step = 15700 (1.882 sec)
INFO:tensorflow:global_step/sec: 52.2863
INFO:tensorflow:loss = 237792.5, step = 15800 (1.913 sec)
INFO:tensorflow:global_step/sec: 52.1664
INFO:tensorflow:loss = 189806.89, step = 15900 (1.917 sec)
INFO:tensorflow:global_step/sec: 54.1019
INFO:tensorflow:loss = 259172.34, step = 16000 (1.849 sec)
INFO:tensorflow:global_step/sec: 47.7479
INFO:tensorflow:loss = 198640.36, step = 16100 (2.094 sec)
INFO:tensorflow:global_step/sec: 48.6676
INFO:tensorflow:loss = 184035.19, step = 16200 (2.055 sec)
INFO:tensorflow:global_step/sec: 42.7219
INFO:tensorflow:loss = 157760.19, step = 16300 (2.342 sec)
INFO:tensorflow:global_step/sec: 55.2615
INFO:tensorflow:loss = 200653.8, step = 16400 (1.808 sec)
INFO:tensorflow:global_step/sec: 46.1104
INFO:tensorflow:loss = 235155.03, step = 16500 (2.169 sec)
IN

INFO:tensorflow:loss = 212629.12, step = 23800 (1.840 sec)
INFO:tensorflow:global_step/sec: 54.126
INFO:tensorflow:loss = 453211.88, step = 23900 (1.848 sec)
INFO:tensorflow:global_step/sec: 52.3267
INFO:tensorflow:loss = 173795.67, step = 24000 (1.912 sec)
INFO:tensorflow:global_step/sec: 49.3999
INFO:tensorflow:loss = 505303.3, step = 24100 (2.023 sec)
INFO:tensorflow:global_step/sec: 52.5646
INFO:tensorflow:loss = 183457.53, step = 24200 (1.902 sec)
INFO:tensorflow:global_step/sec: 53.2005
INFO:tensorflow:loss = 192268.72, step = 24300 (1.880 sec)
INFO:tensorflow:global_step/sec: 51.1043
INFO:tensorflow:loss = 161883.56, step = 24400 (1.956 sec)
INFO:tensorflow:global_step/sec: 53.2089
INFO:tensorflow:loss = 262356.8, step = 24500 (1.879 sec)
INFO:tensorflow:global_step/sec: 53.5203
INFO:tensorflow:loss = 177155.73, step = 24600 (1.869 sec)
INFO:tensorflow:global_step/sec: 48.0708
INFO:tensorflow:loss = 180439.36, step = 24700 (2.080 sec)
INFO:tensorflow:global_step/sec: 49.2959
INF

INFO:tensorflow:global_step/sec: 52.7987
INFO:tensorflow:loss = 164797.56, step = 32000 (1.894 sec)
INFO:tensorflow:global_step/sec: 52.9601
INFO:tensorflow:loss = 184973.7, step = 32100 (1.888 sec)
INFO:tensorflow:global_step/sec: 53.2249
INFO:tensorflow:loss = 191233.44, step = 32200 (1.879 sec)
INFO:tensorflow:global_step/sec: 46.8718
INFO:tensorflow:loss = 180745.16, step = 32300 (2.133 sec)
INFO:tensorflow:global_step/sec: 51.4876
INFO:tensorflow:loss = 168648.47, step = 32400 (1.943 sec)
INFO:tensorflow:global_step/sec: 49.3624
INFO:tensorflow:loss = 165251.9, step = 32500 (2.026 sec)
INFO:tensorflow:global_step/sec: 54.4997
INFO:tensorflow:loss = 176396.75, step = 32600 (1.835 sec)
INFO:tensorflow:global_step/sec: 51.1308
INFO:tensorflow:loss = 234995.88, step = 32700 (1.956 sec)
INFO:tensorflow:global_step/sec: 54.6896
INFO:tensorflow:loss = 210079.95, step = 32800 (1.828 sec)
INFO:tensorflow:global_step/sec: 50.3904
INFO:tensorflow:loss = 214352.55, step = 32900 (1.985 sec)
IN

INFO:tensorflow:loss = 170082.38, step = 40200 (1.935 sec)
INFO:tensorflow:global_step/sec: 48.2725
INFO:tensorflow:loss = 152005.94, step = 40300 (2.072 sec)
INFO:tensorflow:global_step/sec: 51.5353
INFO:tensorflow:loss = 217800.95, step = 40400 (1.939 sec)
INFO:tensorflow:global_step/sec: 55.6338
INFO:tensorflow:loss = 263315.94, step = 40500 (1.797 sec)
INFO:tensorflow:global_step/sec: 51.6372
INFO:tensorflow:loss = 220013.03, step = 40600 (1.937 sec)
INFO:tensorflow:global_step/sec: 51.9215
INFO:tensorflow:loss = 189676.47, step = 40700 (1.926 sec)
INFO:tensorflow:global_step/sec: 49.8762
INFO:tensorflow:loss = 193056.38, step = 40800 (2.005 sec)
INFO:tensorflow:global_step/sec: 49.1037
INFO:tensorflow:loss = 171813.84, step = 40900 (2.037 sec)
INFO:tensorflow:global_step/sec: 43.8084
INFO:tensorflow:loss = 233729.75, step = 41000 (2.282 sec)
INFO:tensorflow:global_step/sec: 47.2285
INFO:tensorflow:loss = 179782.6, step = 41100 (2.118 sec)
INFO:tensorflow:global_step/sec: 56.8353
I

INFO:tensorflow:global_step/sec: 50.9144
INFO:tensorflow:loss = 169307.94, step = 48500 (1.964 sec)
INFO:tensorflow:global_step/sec: 55.6379
INFO:tensorflow:loss = 194280.08, step = 48600 (1.797 sec)
INFO:tensorflow:global_step/sec: 52.131
INFO:tensorflow:loss = 202837.4, step = 48700 (1.918 sec)
INFO:tensorflow:global_step/sec: 55.0768
INFO:tensorflow:loss = 198208.84, step = 48800 (1.816 sec)
INFO:tensorflow:global_step/sec: 49.2225
INFO:tensorflow:loss = 176658.97, step = 48900 (2.031 sec)
INFO:tensorflow:global_step/sec: 49.1609
INFO:tensorflow:loss = 137912.97, step = 49000 (2.034 sec)
INFO:tensorflow:global_step/sec: 48.2589
INFO:tensorflow:loss = 177553.17, step = 49100 (2.073 sec)
INFO:tensorflow:global_step/sec: 55.4138
INFO:tensorflow:loss = 167913.48, step = 49200 (1.804 sec)
INFO:tensorflow:global_step/sec: 52.3501
INFO:tensorflow:loss = 242922.72, step = 49300 (1.910 sec)
INFO:tensorflow:global_step/sec: 50.795
INFO:tensorflow:loss = 176813.42, step = 49400 (1.970 sec)
INF

INFO:tensorflow:loss = 208816.19, step = 56700 (1.868 sec)
INFO:tensorflow:global_step/sec: 49.4613
INFO:tensorflow:loss = 158243.5, step = 56800 (2.022 sec)
INFO:tensorflow:global_step/sec: 53.8894
INFO:tensorflow:loss = 197294.45, step = 56900 (1.856 sec)
INFO:tensorflow:global_step/sec: 54.1292
INFO:tensorflow:loss = 172944.03, step = 57000 (1.847 sec)
INFO:tensorflow:global_step/sec: 47.9945
INFO:tensorflow:loss = 219011.81, step = 57100 (2.084 sec)
INFO:tensorflow:global_step/sec: 51.794
INFO:tensorflow:loss = 185049.56, step = 57200 (1.932 sec)
INFO:tensorflow:global_step/sec: 54.5312
INFO:tensorflow:loss = 167713.03, step = 57300 (1.833 sec)
INFO:tensorflow:global_step/sec: 54.7286
INFO:tensorflow:loss = 181780.9, step = 57400 (1.827 sec)
INFO:tensorflow:global_step/sec: 48.2017
INFO:tensorflow:loss = 168611.8, step = 57500 (2.075 sec)
INFO:tensorflow:global_step/sec: 53.9015
INFO:tensorflow:loss = 191527.4, step = 57600 (1.855 sec)
INFO:tensorflow:global_step/sec: 55.5615
INFO:

INFO:tensorflow:global_step/sec: 48.752
INFO:tensorflow:loss = 176385.47, step = 64900 (2.050 sec)
INFO:tensorflow:global_step/sec: 53.8188
INFO:tensorflow:loss = 225886.88, step = 65000 (1.859 sec)
INFO:tensorflow:global_step/sec: 51.5968
INFO:tensorflow:loss = 186323.53, step = 65100 (1.937 sec)
INFO:tensorflow:global_step/sec: 54.762
INFO:tensorflow:loss = 199367.86, step = 65200 (1.826 sec)
INFO:tensorflow:global_step/sec: 50.2039
INFO:tensorflow:loss = 192791.83, step = 65300 (1.992 sec)
INFO:tensorflow:global_step/sec: 53.2437
INFO:tensorflow:loss = 192003.31, step = 65400 (1.879 sec)
INFO:tensorflow:global_step/sec: 47.7103
INFO:tensorflow:loss = 165992.23, step = 65500 (2.096 sec)
INFO:tensorflow:global_step/sec: 52.5746
INFO:tensorflow:loss = 178052.08, step = 65600 (1.903 sec)
INFO:tensorflow:global_step/sec: 56.8999
INFO:tensorflow:loss = 193280.06, step = 65700 (1.757 sec)
INFO:tensorflow:global_step/sec: 49.5564
INFO:tensorflow:loss = 161459.84, step = 65800 (2.018 sec)
IN

INFO:tensorflow:loss = 175535.97, step = 73100 (2.126 sec)
INFO:tensorflow:global_step/sec: 53.1351
INFO:tensorflow:loss = 197607.0, step = 73200 (1.881 sec)
INFO:tensorflow:global_step/sec: 52.5567
INFO:tensorflow:loss = 147599.53, step = 73300 (1.903 sec)
INFO:tensorflow:global_step/sec: 46.5405
INFO:tensorflow:loss = 208945.97, step = 73400 (2.149 sec)
INFO:tensorflow:global_step/sec: 51.4168
INFO:tensorflow:loss = 173464.62, step = 73500 (1.945 sec)
INFO:tensorflow:global_step/sec: 50.4439
INFO:tensorflow:loss = 216671.4, step = 73600 (1.982 sec)
INFO:tensorflow:global_step/sec: 53.4849
INFO:tensorflow:loss = 175943.69, step = 73700 (1.870 sec)
INFO:tensorflow:global_step/sec: 51.0701
INFO:tensorflow:loss = 169209.06, step = 73800 (1.958 sec)
INFO:tensorflow:global_step/sec: 49.7772
INFO:tensorflow:loss = 193000.47, step = 73900 (2.009 sec)
INFO:tensorflow:global_step/sec: 49.16
INFO:tensorflow:loss = 202353.75, step = 74000 (2.035 sec)
INFO:tensorflow:global_step/sec: 49.9539
INFO

INFO:tensorflow:global_step/sec: 50.7462
INFO:tensorflow:loss = 165092.38, step = 81400 (1.971 sec)
INFO:tensorflow:global_step/sec: 47.8241
INFO:tensorflow:loss = 236141.02, step = 81500 (2.092 sec)
INFO:tensorflow:global_step/sec: 49.1641
INFO:tensorflow:loss = 199037.84, step = 81600 (2.033 sec)
INFO:tensorflow:global_step/sec: 50.2095
INFO:tensorflow:loss = 167836.58, step = 81700 (1.993 sec)
INFO:tensorflow:global_step/sec: 52.2596
INFO:tensorflow:loss = 170336.89, step = 81800 (1.913 sec)
INFO:tensorflow:global_step/sec: 48.7912
INFO:tensorflow:loss = 176130.53, step = 81900 (2.050 sec)
INFO:tensorflow:global_step/sec: 49.1653
INFO:tensorflow:loss = 218959.97, step = 82000 (2.034 sec)
INFO:tensorflow:global_step/sec: 54.5389
INFO:tensorflow:loss = 209353.72, step = 82100 (1.833 sec)
INFO:tensorflow:global_step/sec: 49.9221
INFO:tensorflow:loss = 202637.4, step = 82200 (2.005 sec)
INFO:tensorflow:global_step/sec: 51.4023
INFO:tensorflow:loss = 164716.06, step = 82300 (1.945 sec)
I

INFO:tensorflow:loss = 274299.94, step = 89600 (2.155 sec)
INFO:tensorflow:global_step/sec: 50.0295
INFO:tensorflow:loss = 173768.4, step = 89700 (1.999 sec)
INFO:tensorflow:global_step/sec: 51.0052
INFO:tensorflow:loss = 157158.38, step = 89800 (1.960 sec)
INFO:tensorflow:global_step/sec: 51.0693
INFO:tensorflow:loss = 178916.31, step = 89900 (1.959 sec)
INFO:tensorflow:global_step/sec: 54.5912
INFO:tensorflow:loss = 210372.28, step = 90000 (1.831 sec)
INFO:tensorflow:global_step/sec: 54.1309
INFO:tensorflow:loss = 159466.39, step = 90100 (1.848 sec)
INFO:tensorflow:global_step/sec: 48.7654
INFO:tensorflow:loss = 183384.47, step = 90200 (2.051 sec)
INFO:tensorflow:global_step/sec: 54.4112
INFO:tensorflow:loss = 203258.66, step = 90300 (1.838 sec)
INFO:tensorflow:global_step/sec: 48.7733
INFO:tensorflow:loss = 187721.05, step = 90400 (2.050 sec)
INFO:tensorflow:global_step/sec: 50.8919
INFO:tensorflow:loss = 192007.94, step = 90500 (1.966 sec)
INFO:tensorflow:global_step/sec: 52.6482
I

INFO:tensorflow:global_step/sec: 54.2403
INFO:tensorflow:loss = 260613.03, step = 97800 (1.843 sec)
INFO:tensorflow:global_step/sec: 50.619
INFO:tensorflow:loss = 202564.39, step = 97900 (1.977 sec)
INFO:tensorflow:global_step/sec: 49.4902
INFO:tensorflow:loss = 188595.39, step = 98000 (2.020 sec)
INFO:tensorflow:global_step/sec: 49.1251
INFO:tensorflow:loss = 187533.69, step = 98100 (2.035 sec)
INFO:tensorflow:global_step/sec: 50.8216
INFO:tensorflow:loss = 159422.27, step = 98200 (1.968 sec)
INFO:tensorflow:global_step/sec: 53.068
INFO:tensorflow:loss = 196322.03, step = 98300 (1.884 sec)
INFO:tensorflow:global_step/sec: 56.6664
INFO:tensorflow:loss = 221758.19, step = 98400 (1.764 sec)
INFO:tensorflow:global_step/sec: 51.3219
INFO:tensorflow:loss = 180457.47, step = 98500 (1.950 sec)
INFO:tensorflow:global_step/sec: 48.7495
INFO:tensorflow:loss = 186482.02, step = 98600 (2.050 sec)
INFO:tensorflow:global_step/sec: 49.1566
INFO:tensorflow:loss = 171770.88, step = 98700 (2.034 sec)
IN

INFO:tensorflow:global_step/sec: 54.6569
INFO:tensorflow:loss = 171900.66, step = 106000 (1.830 sec)
INFO:tensorflow:global_step/sec: 49.6016
INFO:tensorflow:loss = 214602.75, step = 106100 (2.016 sec)
INFO:tensorflow:global_step/sec: 51.6347
INFO:tensorflow:loss = 176010.78, step = 106200 (1.937 sec)
INFO:tensorflow:global_step/sec: 46.6061
INFO:tensorflow:loss = 183676.53, step = 106300 (2.146 sec)
INFO:tensorflow:global_step/sec: 53.0292
INFO:tensorflow:loss = 174593.3, step = 106400 (1.886 sec)
INFO:tensorflow:global_step/sec: 48.1837
INFO:tensorflow:loss = 168425.81, step = 106500 (2.076 sec)
INFO:tensorflow:global_step/sec: 49.3352
INFO:tensorflow:loss = 199842.2, step = 106600 (2.027 sec)
INFO:tensorflow:global_step/sec: 51.3197
INFO:tensorflow:loss = 145041.47, step = 106700 (1.948 sec)
INFO:tensorflow:global_step/sec: 51.2357
INFO:tensorflow:loss = 157345.31, step = 106800 (1.952 sec)
INFO:tensorflow:global_step/sec: 54.5588
INFO:tensorflow:loss = 164861.34, step = 106900 (1.8

INFO:tensorflow:loss = 170670.02, step = 114100 (2.059 sec)
INFO:tensorflow:global_step/sec: 48.5652
INFO:tensorflow:loss = 165132.0, step = 114200 (2.059 sec)
INFO:tensorflow:global_step/sec: 53.5735
INFO:tensorflow:loss = 180956.36, step = 114300 (1.867 sec)
INFO:tensorflow:global_step/sec: 48.936
INFO:tensorflow:loss = 211855.4, step = 114400 (2.044 sec)
INFO:tensorflow:global_step/sec: 47.0494
INFO:tensorflow:loss = 171628.19, step = 114500 (2.125 sec)
INFO:tensorflow:global_step/sec: 51.9146
INFO:tensorflow:loss = 180736.25, step = 114600 (1.926 sec)
INFO:tensorflow:global_step/sec: 51.7197
INFO:tensorflow:loss = 186338.88, step = 114700 (1.933 sec)
INFO:tensorflow:global_step/sec: 51.5812
INFO:tensorflow:loss = 197118.61, step = 114800 (1.939 sec)
INFO:tensorflow:global_step/sec: 50.7109
INFO:tensorflow:loss = 183703.56, step = 114900 (1.972 sec)
INFO:tensorflow:global_step/sec: 53.8168
INFO:tensorflow:loss = 161343.78, step = 115000 (1.858 sec)
INFO:tensorflow:global_step/sec: 5

In [48]:
val_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': x_val_norm}, y=y_val, num_epochs=1, shuffle=False)
scores = regressor.evaluate(input_fn=val_input_fn)
print('MSE (tensorflow): {0:f}'.format(scores['average_loss']))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-08-17:57:34
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpi5s6r3bc/model.ckpt-122056
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-08-17:58:01
INFO:tensorflow:Saving dict for global step 122056: average_loss = 20.385914, global_step = 122056, label/mean = 11.321863, loss = 2609.3567, prediction/mean = 11.31645
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 122056: /tmp/tmpi5s6r3bc/model.ckpt-122056
MSE (tensorflow): 20.385914


In [49]:
predictions = regressor.predict(input_fn=val_input_fn)
y_predicted = np.array(list(p['predictions'] for p in predictions))
y_predicted = y_predicted.reshape(np.array(y_val).shape)
score_sklearn = metrics.mean_squared_error(y_predicted, y_val)
print('MSE (sklearn): {0:f}'.format(score_sklearn))
y_predicted.shape

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpi5s6r3bc/model.ckpt-122056
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
MSE (sklearn): 20.385874


(5424684, 1)

In [50]:
y_predicted[0:2]

array([[9.330373],
       [6.821217]], dtype=float32)

In [51]:
test_df = feather.read_dataframe("../../datasets/kaggle/new-york-city-taxi-fare-prediction/test.feather")

In [52]:
test_df["hour_period"]=test_df["hour"] // 4
test_df = pd.get_dummies(test_df, prefix=["year","hour_period"], columns=["year","hour_period"])

In [53]:
cols = [
    "passenger_count",
    "distance_miles",
    "distance_to_center",
    "is_to_from_JFK_new",
    "year_2009",
    "year_2010",
    "year_2011",
    "year_2012",
    "year_2013",
    "year_2014",
    "year_2015",
    "hour_period_0",
    "hour_period_1",
    "hour_period_2",
    "hour_period_3",
    "hour_period_4",
    "hour_period_5",
]
x_test = test_df[cols].values
x_test_norm = scaler.transform(x_test)



In [54]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': x_test_norm}, num_epochs=1, shuffle=False)
test_predictions = regressor.predict(input_fn=test_input_fn)
y_predicted = np.array(list(p['predictions'] for p in test_predictions))
y_predicted = y_predicted.reshape((9914,1))
#y_predicted = np.array(list(test_predictions))
#y_predicted = y_predicted.reshape((9914,1))
y_predicted.shape

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpi5s6r3bc/model.ckpt-122056
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


(9914, 1)

In [55]:
preds = [p[0] for p in y_predicted]
preds

[10.449707,
 10.771171,
 5.8545885,
 7.7011876,
 13.813,
 10.286365,
 5.2941365,
 49.978462,
 12.175744,
 6.789829,
 10.493305,
 17.269384,
 5.9283423,
 7.0433373,
 7.67496,
 13.375926,
 5.8098493,
 8.507821,
 6.016215,
 6.125811,
 7.615338,
 9.476878,
 6.928417,
 10.559484,
 20.579998,
 7.062305,
 41.51185,
 30.485712,
 6.4242363,
 17.188713,
 12.226224,
 6.990439,
 6.7346992,
 12.024485,
 10.0040655,
 7.1570344,
 7.2114215,
 6.990439,
 9.458141,
 6.403864,
 23.594002,
 11.933541,
 20.635502,
 10.733007,
 7.2283363,
 13.254312,
 6.9284267,
 9.289203,
 6.937711,
 7.0655775,
 12.118506,
 14.162547,
 21.279396,
 6.968911,
 6.9284244,
 17.339144,
 13.253526,
 10.878144,
 14.830139,
 14.490673,
 9.460266,
 10.2754,
 7.134239,
 6.928418,
 7.2114134,
 6.5007367,
 10.426269,
 6.789826,
 6.990439,
 19.539972,
 9.381004,
 6.78983,
 13.110632,
 7.047948,
 11.911989,
 11.551909,
 45.976845,
 6.882392,
 6.8352985,
 6.506706,
 6.937711,
 6.5135565,
 7.134239,
 10.354113,
 21.317677,
 6.6125064,
 7.

In [56]:
# Write the predictions to a CSV file which we can submit to the competition.
submission = pd.DataFrame(
    {'key': test_df.key, 'fare_amount': preds},
    columns = ['key', 'fare_amount'])
submission.to_csv('../../datasets/kaggle/new-york-city-taxi-fare-prediction/submission.csv', index = False)

In [57]:
submission.describe()

Unnamed: 0,fare_amount
count,9914.0
mean,11.390019
std,8.545333
min,5.148786
25%,6.29438
50%,8.573061
75%,12.462543
max,95.105186


In [33]:
submission.describe()

Unnamed: 0,fare_amount
count,9914.0
mean,11.342434
std,8.472132
min,4.755213
25%,6.20134
50%,8.625061
75%,12.361377
max,79.764259
