In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler

In [None]:
df = pd.read_csv('./boston/train.csv')
df.info()

In [None]:
X = df[['nox', 'rm', 'chas', 'dis', 'ptratio', 'lstat', 'rad']].values
y = df['medv'].values

In [None]:
print(X.shape)

In [None]:
print(X)

In [None]:
scaler = MinMaxScaler()
scaled_X = scaler.fit_transform(X)

In [None]:
print(scaled_X)

In [None]:
poly = PolynomialFeatures(2)

In [None]:
new_X = poly.fit_transform(scaled_X)

In [None]:
print(new_X.shape)

In [None]:
print(new_X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(new_X, y, test_size=0.1, random_state=40)

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
print('Score: {}'.format(lr.score(X_test, y_test)))

# New York Taxi Cab

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./nyc/train.csv', nrows=1000000)

In [None]:
df.info()

In [None]:
df.head()

In [3]:
X = df[['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'passenger_count']].values
y = df[['fare_amount']].values

In [9]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

In [12]:
from sklearn.linear_model import LinearRegression

In [None]:
linear_model = LinearRegression()

In [None]:
linear_model.fit(X_train, y_train)

In [None]:
y_pred = linear_model.predict(X_test)

In [14]:
from sklearn.metrics import mean_squared_error

In [None]:
print('MSE: {}'.format(mean_squared_error(y_test, y_pred)))

In [None]:
print('Score: {}'.format(linear_model.score(X_test, y_test)))

In [None]:
_c = pd.DataFrame({'actuals': y_test.reshape(-1), 'predicted': y_pred.reshape(-1)})

In [None]:
_c.tail()

## train a better model
### Engineer new features

In [None]:
df[['fare_amount']].describe()

In [16]:
df.dropna(axis=0, inplace=True)

## limit the fare_amount to between 1 and 39 USD

In [17]:
new_df = df[df.fare_amount >= 1]

In [18]:
new_df[['fare_amount']].describe()

Unnamed: 0,fare_amount
count,999915.0
mean,11.349036
std,9.821188
min,1.5
25%,6.0
50%,8.5
75%,12.5
max,500.0


In [19]:
new_df = new_df[df.fare_amount < 39]

  """Entry point for launching an IPython kernel.


In [20]:
new_df[['fare_amount']].describe()

Unnamed: 0,fare_amount
count,970736.0
mean,10.10794
std,6.280083
min,1.5
25%,6.0
50%,8.1
75%,12.1
max,38.99


In [21]:
X = new_df[['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'passenger_count']].values
y = new_df[['fare_amount']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

In [22]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred = linear_model.predict(X_test)
print('MSE: {}'.format(mean_squared_error(y_test, y_pred)))
print('Score: {}'.format(linear_model.score(X_test, y_test)))

MSE: 39.51288157809759
Score: 6.0526111260839066e-05


In [None]:
_c = pd.DataFrame({'actuals': y_test.reshape(-1), 'predicted': y_pred.reshape(-1)})

In [None]:
_c.head()

In [None]:
new_df.describe()

In [None]:
new_df.info()

## take date and time into consideration

In [23]:
new_df['_pickup_datetime'] = pd.to_datetime(new_df['pickup_datetime'])

In [24]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 970736 entries, 0 to 999999
Data columns (total 9 columns):
key                  970736 non-null object
fare_amount          970736 non-null float64
pickup_datetime      970736 non-null object
pickup_longitude     970736 non-null float64
pickup_latitude      970736 non-null float64
dropoff_longitude    970736 non-null float64
dropoff_latitude     970736 non-null float64
passenger_count      970736 non-null int64
_pickup_datetime     970736 non-null datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), float64(5), int64(1), object(2)
memory usage: 74.1+ MB


## extract interesting information from datetime

In [25]:
new_df['_month'] = new_df._pickup_datetime.dt.month

In [26]:
new_df['_day'] = new_df._pickup_datetime.dt.day

In [27]:
new_df['_dayofweek'] = new_df._pickup_datetime.dt.dayofweek

In [28]:
new_df['_hour'] = new_df._pickup_datetime.dt.hour

## extract direction of travel

In [29]:
new_df['_lon_diff'] = new_df['pickup_longitude'] - new_df['dropoff_longitude']
new_df['_lat_diff'] = new_df['pickup_latitude'] - new_df['dropoff_latitude']

In [30]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 970736 entries, 0 to 999999
Data columns (total 15 columns):
key                  970736 non-null object
fare_amount          970736 non-null float64
pickup_datetime      970736 non-null object
pickup_longitude     970736 non-null float64
pickup_latitude      970736 non-null float64
dropoff_longitude    970736 non-null float64
dropoff_latitude     970736 non-null float64
passenger_count      970736 non-null int64
_pickup_datetime     970736 non-null datetime64[ns, UTC]
_month               970736 non-null int64
_day                 970736 non-null int64
_dayofweek           970736 non-null int64
_hour                970736 non-null int64
_lon_diff            970736 non-null float64
_lat_diff            970736 non-null float64
dtypes: datetime64[ns, UTC](1), float64(7), int64(5), object(2)
memory usage: 118.5+ MB


# train our model again

In [31]:
X = new_df[['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'passenger_count', '_month', '_day', '_dayofweek', '_hour', '_lon_diff', '_lat_diff']].values
y = new_df[['fare_amount']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred = linear_model.predict(X_test)
print('MSE: {}'.format(mean_squared_error(y_test, y_pred)))
print('Score: {}'.format(linear_model.score(X_test, y_test)))

# try to visualize

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
sns.scatterplot(x='_lon_diff', y='_lat_diff', data=new_df)

In [None]:
new_df = new_df[new_df._lon_diff > -100]

In [None]:
new_df = new_df[new_df._lat_diff < 50]

In [None]:
sns.scatterplot(x='_lon_diff', y='_lat_diff', data=new_df)

In [None]:
sns.scatterplot(x='_lon_diff', y='fare_amount', data=new_df)

In [None]:
sns.scatterplot(x='_lat_diff', y='fare_amount', data=new_df)

In [None]:
sns.distplot(df['fare_amount'])

In [None]:
sns.boxplot(df['fare_amount'])

In [None]:
sns.catplot(y='fare_amount', x='_month', kind='box', data=new_df)

In [None]:
sns.catplot(y='fare_amount', x='_dayofweek', kind='box', data=new_df)

In [None]:
sns.catplot(y='fare_amount', x='_hour', kind='box', data=new_df)

In [None]:
sns.countplot(x='_hour', data=new_df)

In [None]:
new_df.info()

In [None]:
sns.countplot(x='_month', data=new_df)

In [None]:
new_df.head()

# convert all categorical variables

In [32]:
month_df = pd.get_dummies(new_df['_month'], prefix='_month', drop_first=True)

In [33]:
new_df = pd.concat([new_df, month_df], axis=1)

In [34]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 970736 entries, 0 to 999999
Data columns (total 26 columns):
key                  970736 non-null object
fare_amount          970736 non-null float64
pickup_datetime      970736 non-null object
pickup_longitude     970736 non-null float64
pickup_latitude      970736 non-null float64
dropoff_longitude    970736 non-null float64
dropoff_latitude     970736 non-null float64
passenger_count      970736 non-null int64
_pickup_datetime     970736 non-null datetime64[ns, UTC]
_month               970736 non-null int64
_day                 970736 non-null int64
_dayofweek           970736 non-null int64
_hour                970736 non-null int64
_lon_diff            970736 non-null float64
_lat_diff            970736 non-null float64
_month_2             970736 non-null uint8
_month_3             970736 non-null uint8
_month_4             970736 non-null uint8
_month_5             970736 non-null uint8
_month_6             970736 non-null uint8

In [35]:
new_df.head()

Unnamed: 0,key,fare_amount,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count,_pickup_datetime,_month,...,_month_3,_month_4,_month_5,_month_6,_month_7,_month_8,_month_9,_month_10,_month_11,_month_12
0,2009-06-15 17:26:21.0000001,4.5,2009-06-15 17:26:21 UTC,-73.844311,40.721319,-73.84161,40.712278,1,2009-06-15 17:26:21+00:00,6,...,0,0,0,1,0,0,0,0,0,0
1,2010-01-05 16:52:16.0000002,16.9,2010-01-05 16:52:16 UTC,-74.016048,40.711303,-73.979268,40.782004,1,2010-01-05 16:52:16+00:00,1,...,0,0,0,0,0,0,0,0,0,0
2,2011-08-18 00:35:00.00000049,5.7,2011-08-18 00:35:00 UTC,-73.982738,40.76127,-73.991242,40.750562,2,2011-08-18 00:35:00+00:00,8,...,0,0,0,0,0,1,0,0,0,0
3,2012-04-21 04:30:42.0000001,7.7,2012-04-21 04:30:42 UTC,-73.98713,40.733143,-73.991567,40.758092,1,2012-04-21 04:30:42+00:00,4,...,0,1,0,0,0,0,0,0,0,0
4,2010-03-09 07:51:00.000000135,5.3,2010-03-09 07:51:00 UTC,-73.968095,40.768008,-73.956655,40.783762,1,2010-03-09 07:51:00+00:00,3,...,1,0,0,0,0,0,0,0,0,0


In [36]:
X = new_df.drop(['fare_amount', 'key', 'pickup_datetime', '_pickup_datetime'], axis=1).values
y = new_df[['fare_amount']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred = linear_model.predict(X_test)
print('MSE: {}'.format(mean_squared_error(y_test, y_pred)))
print('Score: {}'.format(linear_model.score(X_test, y_test)))

MSE: 39.446191288477856
Score: 0.0017482352950124458


# repeat for _day

In [37]:
day_df = pd.get_dummies(new_df['_day'], prefix='_day', drop_first=True)

# repeat for _dayofweek

In [38]:
dow_df = pd.get_dummies(new_df['_dayofweek'], prefix='_dayofweek', drop_first=True)

# repeat for _hour

In [39]:
hour_df = pd.get_dummies(new_df['_hour'], prefix='_hour', drop_first=True)

In [40]:
new_df = pd.concat([new_df, day_df, dow_df, hour_df], axis=1)

In [41]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 970736 entries, 0 to 999999
Data columns (total 85 columns):
key                  970736 non-null object
fare_amount          970736 non-null float64
pickup_datetime      970736 non-null object
pickup_longitude     970736 non-null float64
pickup_latitude      970736 non-null float64
dropoff_longitude    970736 non-null float64
dropoff_latitude     970736 non-null float64
passenger_count      970736 non-null int64
_pickup_datetime     970736 non-null datetime64[ns, UTC]
_month               970736 non-null int64
_day                 970736 non-null int64
_dayofweek           970736 non-null int64
_hour                970736 non-null int64
_lon_diff            970736 non-null float64
_lat_diff            970736 non-null float64
_month_2             970736 non-null uint8
_month_3             970736 non-null uint8
_month_4             970736 non-null uint8
_month_5             970736 non-null uint8
_month_6             970736 non-null uint8

# train a model again

In [None]:
X = new_df.drop(['fare_amount', 'key', 'pickup_datetime', '_pickup_datetime'], axis=1).values
y = new_df[['fare_amount']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred = linear_model.predict(X_test)
print('MSE: {}'.format(mean_squared_error(y_test, y_pred)))
print('Score: {}'.format(linear_model.score(X_test, y_test)))

* we have more data but the model isn't improving because it's too simple. But, we don't want to try and fit a polynomial feature, so let's try a deep neural network

In [42]:
import tensorflow as tf
import numpy as np

In [43]:
feature_columns = []

# numeric columns
pickup_lon = tf.feature_column.numeric_column('pickup_longitude', dtype=tf.dtypes.float64)
dropoff_lon = tf.feature_column.numeric_column('dropoff_longitude', dtype=tf.dtypes.float64)
pickup_lat = tf.feature_column.numeric_column('pickup_latitude', dtype=tf.dtypes.float64)
dropoff_lat = tf.feature_column.numeric_column('dropoff_latitude', dtype=tf.dtypes.float64)

lat_diff = tf.feature_column.numeric_column('_lat_diff', dtype=tf.dtypes.float64)
lon_diff = tf.feature_column.numeric_column('_lon_diff', dtype=tf.dtypes.float64)

# categorical columns
month = tf.feature_column.categorical_column_with_identity('_month', num_buckets=13)
day = tf.feature_column.categorical_column_with_identity('_day', num_buckets=32)
dayofweek = tf.feature_column.categorical_column_with_identity('_dayofweek', num_buckets=7)
hour = tf.feature_column.categorical_column_with_identity('_hour', num_buckets=24)

# bucketized columns
b_pickup_lat = tf.feature_column.bucketized_column(pickup_lat, np.linspace(38.0, 42.0, 60 * 4).tolist())
b_pickup_lon = tf.feature_column.bucketized_column(pickup_lon, np.linspace(-76.0, -72.0, 60 * 4).tolist())
b_dropoff_lat = tf.feature_column.bucketized_column(dropoff_lat, np.linspace(38.0, 42.0, 60 * 4).tolist())
b_dropoff_lon = tf.feature_column.bucketized_column(dropoff_lon, np.linspace(-76.0, -72.0, 60 * 4).tolist())

# crossed columns
dayofweek_x_hour = tf.feature_column.crossed_column([dayofweek, hour], 7 * 24)
b_p_lat_x_b_p_lon = tf.feature_column.crossed_column([b_pickup_lat, b_pickup_lon], (60 * 4) ** 2)
b_d_lat_x_b_d_lon = tf.feature_column.crossed_column([b_dropoff_lat, b_dropoff_lon], (60 * 4) ** 2)
b_pickup_x_b_dropoff = tf.feature_column.crossed_column([b_p_lat_x_b_p_lon, b_d_lat_x_b_d_lon], (60 * 4) ** 4)



feature_columns = [pickup_lon, dropoff_lon, pickup_lat, dropoff_lat, month, day, dayofweek, hour]
dense_columns = [b_pickup_lat, b_pickup_lon, b_dropoff_lat, b_dropoff_lon]
for i in [month, day, dayofweek, hour, dayofweek_x_hour, b_p_lat_x_b_p_lon, b_d_lat_x_b_d_lon]:
    dense_columns.append(tf.feature_column.embedding_column(i, 64))

In [44]:
# we need a new way of getting data into the model
def df_to_dataset(df, columns, shuffle=True, batch_size=64):
  df = df.copy()
  labels = df.pop('fare_amount')
  features_df = df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  if shuffle:
    ds = ds.shuffle(buffer_size=len(df))
  ds = ds.batch(batch_size)
  return ds

In [45]:
columns = ['pickup_longitude', 'dropoff_longitude', 'pickup_latitude', 'dropoff_latitude', '_lat_diff', '_lon_diff', '_month', '_day', '_dayofweek', '_hour']

In [46]:
# extract train and test sets
train = new_df.sample(frac=0.9,random_state=0)
val = new_df.drop(train.index)

In [47]:
train_ds = df_to_dataset(train, columns)
val_ds = df_to_dataset(val, columns)

In [48]:
train[columns].describe()

Unnamed: 0,pickup_longitude,dropoff_longitude,pickup_latitude,dropoff_latitude,_lat_diff,_lon_diff,_month,_day,_dayofweek,_hour
count,873662.0,873662.0,873662.0,873662.0,873662.0,873662.0,873662.0,873662.0,873662.0,873662.0
mean,-72.548252,-72.551212,39.94099,39.931913,0.009077,0.00296,6.259924,15.699446,3.042504,13.517266
std,12.183339,11.281543,7.799619,8.427796,8.804376,7.865291,3.43792,8.685469,1.948975,6.529912
min,-3377.680935,-3383.296608,-3116.285383,-3114.338567,-3157.073381,-2822.762242,1.0,1.0,0.0,0.0
25%,-73.992204,-73.991422,40.735855,40.73483,-0.013644,-0.013449,3.0,8.0,1.0,9.0
50%,-73.982001,-73.98031,40.753018,40.753465,0.0,-0.000589,6.0,16.0,3.0,14.0
75%,-73.967941,-73.964452,40.767292,40.76826,0.013413,0.010782,9.0,23.0,5.0,19.0
max,2522.271325,45.581619,2621.62843,1651.553433,5486.099433,4240.388978,12.0,31.0,6.0,23.0


# Train a DNN

In [None]:
from tensorflow import keras

In [None]:
new_df.info()

In [None]:
X = new_df.drop(['fare_amount', 'key', 'pickup_datetime', '_pickup_datetime'], axis=1).values
y = new_df[['fare_amount']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

#featuresLayer = keras.layers.DenseFeatures(dense_columns)
model = keras.Sequential([
    keras.layers.Dense(128, input_shape=(X_train.shape[1],), activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse')

model.fit(X_train, y_train, epochs=10, validation_split=0.1)

In [None]:
y_pred = model.predict(X_test)
comp = pd.DataFrame({'actuals': y_test.reshape(-1), 'predicted': y_pred.reshape(-1)})
comp.head(n=10)

In [None]:
print('MSE: {}'.format(mean_squared_error(y_test, y_pred)))

# Use Estimators

## Linear Model

In [49]:
def train_fn():
  _df = train.copy()
  labels = _df.pop('fare_amount')
  features_df = _df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  _ds = ds.shuffle(train.shape[0]).batch(64).repeat(5)
  
  return _ds

def eval_fn():
  _df = val.copy()
  labels = _df.pop('fare_amount')
  features_df = _df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  _ds = ds.batch(64).repeat(1)
  
  return _ds

In [50]:
estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmpm3wa1h4f', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a4c983850>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [51]:
estimator.train(input_fn=train_fn, steps=None)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running loca

INFO:tensorflow:global_step/sec: 445.177
INFO:tensorflow:loss = 34.37677, step = 5400 (0.225 sec)
INFO:tensorflow:global_step/sec: 473.597
INFO:tensorflow:loss = 20.62466, step = 5500 (0.211 sec)
INFO:tensorflow:global_step/sec: 457.806
INFO:tensorflow:loss = 24.990108, step = 5600 (0.218 sec)
INFO:tensorflow:global_step/sec: 472.795
INFO:tensorflow:loss = 36.54503, step = 5700 (0.212 sec)
INFO:tensorflow:global_step/sec: 466.546
INFO:tensorflow:loss = 54.197933, step = 5800 (0.214 sec)
INFO:tensorflow:global_step/sec: 462.79
INFO:tensorflow:loss = 31.941154, step = 5900 (0.216 sec)
INFO:tensorflow:global_step/sec: 452.972
INFO:tensorflow:loss = 52.070427, step = 6000 (0.221 sec)
INFO:tensorflow:global_step/sec: 470.142
INFO:tensorflow:loss = 56.548508, step = 6100 (0.212 sec)
INFO:tensorflow:global_step/sec: 458.318
INFO:tensorflow:loss = 48.18702, step = 6200 (0.218 sec)
INFO:tensorflow:global_step/sec: 481.506
INFO:tensorflow:loss = 42.51331, step = 6300 (0.208 sec)
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 23.5837
INFO:tensorflow:loss = 39.894028, step = 13700 (4.240 sec)
INFO:tensorflow:global_step/sec: 654.939
INFO:tensorflow:loss = 47.025528, step = 13800 (0.153 sec)
INFO:tensorflow:global_step/sec: 642.768
INFO:tensorflow:loss = 30.562662, step = 13900 (0.155 sec)
INFO:tensorflow:global_step/sec: 667.2
INFO:tensorflow:loss = 37.88817, step = 14000 (0.150 sec)
INFO:tensorflow:global_step/sec: 693.16
INFO:tensorflow:loss = 19.398829, step = 14100 (0.144 sec)
INFO:tensorflow:global_step/sec: 680.277
INFO:tensorflow:loss = 28.06113, step = 14200 (0.147 sec)
INFO:tensorflow:global_step/sec: 670.344
INFO:tensorflow:loss = 48.537918, step = 14300 (0.149 sec)
INFO:tensorflow:global_step/sec: 686.705
INFO:tensorflow:loss = 29.782774, step = 14400 (0.146 sec)
INFO:tensorflow:global_step/sec: 681.882
INFO:tensorflow:loss = 55.259964, step = 14500 (0.147 sec)
INFO:tensorflow:global_step/sec: 685.415
INFO:tensorflow:loss = 47.11022, step = 14600 (0.146 sec)
INFO:t

INFO:tensorflow:loss = 36.12719, step = 21900 (0.227 sec)
INFO:tensorflow:global_step/sec: 403.618
INFO:tensorflow:loss = 46.082836, step = 22000 (0.248 sec)
INFO:tensorflow:global_step/sec: 493.927
INFO:tensorflow:loss = 31.805878, step = 22100 (0.202 sec)
INFO:tensorflow:global_step/sec: 494.117
INFO:tensorflow:loss = 44.48449, step = 22200 (0.202 sec)
INFO:tensorflow:global_step/sec: 457.48
INFO:tensorflow:loss = 58.968887, step = 22300 (0.219 sec)
INFO:tensorflow:global_step/sec: 480.575
INFO:tensorflow:loss = 18.04084, step = 22400 (0.207 sec)
INFO:tensorflow:global_step/sec: 492.262
INFO:tensorflow:loss = 28.647657, step = 22500 (0.203 sec)
INFO:tensorflow:global_step/sec: 489.79
INFO:tensorflow:loss = 46.06524, step = 22600 (0.204 sec)
INFO:tensorflow:global_step/sec: 474.412
INFO:tensorflow:loss = 45.11048, step = 22700 (0.211 sec)
INFO:tensorflow:global_step/sec: 504.859
INFO:tensorflow:loss = 52.74598, step = 22800 (0.198 sec)
INFO:tensorflow:global_step/sec: 413.755
INFO:ten

INFO:tensorflow:global_step/sec: 593.506
INFO:tensorflow:loss = 38.127266, step = 30200 (0.169 sec)
INFO:tensorflow:global_step/sec: 544.497
INFO:tensorflow:loss = 51.582123, step = 30300 (0.184 sec)
INFO:tensorflow:global_step/sec: 499.097
INFO:tensorflow:loss = 38.4771, step = 30400 (0.200 sec)
INFO:tensorflow:global_step/sec: 495.98
INFO:tensorflow:loss = 42.443348, step = 30500 (0.203 sec)
INFO:tensorflow:global_step/sec: 317.277
INFO:tensorflow:loss = 21.578007, step = 30600 (0.314 sec)
INFO:tensorflow:global_step/sec: 270.376
INFO:tensorflow:loss = 48.034447, step = 30700 (0.370 sec)
INFO:tensorflow:global_step/sec: 371.1
INFO:tensorflow:loss = 43.778618, step = 30800 (0.269 sec)
INFO:tensorflow:global_step/sec: 411.062
INFO:tensorflow:loss = 25.15516, step = 30900 (0.243 sec)
INFO:tensorflow:global_step/sec: 479.784
INFO:tensorflow:loss = 39.634872, step = 31000 (0.208 sec)
INFO:tensorflow:global_step/sec: 503.674
INFO:tensorflow:loss = 58.265686, step = 31100 (0.199 sec)
INFO:t

INFO:tensorflow:global_step/sec: 485.086
INFO:tensorflow:loss = 32.62153, step = 38500 (0.206 sec)
INFO:tensorflow:global_step/sec: 461.044
INFO:tensorflow:loss = 41.041183, step = 38600 (0.217 sec)
INFO:tensorflow:global_step/sec: 461.274
INFO:tensorflow:loss = 43.966995, step = 38700 (0.216 sec)
INFO:tensorflow:global_step/sec: 454.527
INFO:tensorflow:loss = 41.909866, step = 38800 (0.220 sec)
INFO:tensorflow:global_step/sec: 466.426
INFO:tensorflow:loss = 42.735752, step = 38900 (0.215 sec)
INFO:tensorflow:global_step/sec: 472.829
INFO:tensorflow:loss = 29.470016, step = 39000 (0.211 sec)
INFO:tensorflow:global_step/sec: 474.062
INFO:tensorflow:loss = 35.534634, step = 39100 (0.211 sec)
INFO:tensorflow:global_step/sec: 513.563
INFO:tensorflow:loss = 40.40486, step = 39200 (0.195 sec)
INFO:tensorflow:global_step/sec: 479.575
INFO:tensorflow:loss = 36.86239, step = 39300 (0.208 sec)
INFO:tensorflow:global_step/sec: 477.729
INFO:tensorflow:loss = 21.633993, step = 39400 (0.210 sec)
INF

INFO:tensorflow:loss = 27.132704, step = 46700 (0.222 sec)
INFO:tensorflow:global_step/sec: 438.689
INFO:tensorflow:loss = 45.05726, step = 46800 (0.228 sec)
INFO:tensorflow:global_step/sec: 435.373
INFO:tensorflow:loss = 39.472958, step = 46900 (0.230 sec)
INFO:tensorflow:global_step/sec: 458.819
INFO:tensorflow:loss = 63.450684, step = 47000 (0.218 sec)
INFO:tensorflow:global_step/sec: 445.236
INFO:tensorflow:loss = 16.657373, step = 47100 (0.225 sec)
INFO:tensorflow:global_step/sec: 405.193
INFO:tensorflow:loss = 53.28688, step = 47200 (0.247 sec)
INFO:tensorflow:global_step/sec: 415.531
INFO:tensorflow:loss = 53.43894, step = 47300 (0.240 sec)
INFO:tensorflow:global_step/sec: 467.072
INFO:tensorflow:loss = 62.985176, step = 47400 (0.214 sec)
INFO:tensorflow:global_step/sec: 394.963
INFO:tensorflow:loss = 28.884796, step = 47500 (0.253 sec)
INFO:tensorflow:global_step/sec: 398.77
INFO:tensorflow:loss = 51.271786, step = 47600 (0.250 sec)
INFO:tensorflow:global_step/sec: 454.18
INFO:

INFO:tensorflow:global_step/sec: 663.209
INFO:tensorflow:loss = 47.73291, step = 55000 (0.150 sec)
INFO:tensorflow:global_step/sec: 660.476
INFO:tensorflow:loss = 40.50183, step = 55100 (0.151 sec)
INFO:tensorflow:global_step/sec: 654.948
INFO:tensorflow:loss = 47.696495, step = 55200 (0.153 sec)
INFO:tensorflow:global_step/sec: 678.1
INFO:tensorflow:loss = 24.519993, step = 55300 (0.147 sec)
INFO:tensorflow:global_step/sec: 717.629
INFO:tensorflow:loss = 36.997795, step = 55400 (0.139 sec)
INFO:tensorflow:global_step/sec: 684.284
INFO:tensorflow:loss = 24.116337, step = 55500 (0.146 sec)
INFO:tensorflow:global_step/sec: 687.729
INFO:tensorflow:loss = 43.760204, step = 55600 (0.145 sec)
INFO:tensorflow:global_step/sec: 667.873
INFO:tensorflow:loss = 46.576973, step = 55700 (0.150 sec)
INFO:tensorflow:global_step/sec: 715.072
INFO:tensorflow:loss = 25.576216, step = 55800 (0.140 sec)
INFO:tensorflow:global_step/sec: 666.356
INFO:tensorflow:loss = 48.662132, step = 55900 (0.150 sec)
INFO

INFO:tensorflow:loss = 47.793926, step = 63200 (0.202 sec)
INFO:tensorflow:global_step/sec: 489.529
INFO:tensorflow:loss = 68.950676, step = 63300 (0.204 sec)
INFO:tensorflow:global_step/sec: 485.994
INFO:tensorflow:loss = 38.15056, step = 63400 (0.206 sec)
INFO:tensorflow:global_step/sec: 493.671
INFO:tensorflow:loss = 52.468872, step = 63500 (0.203 sec)
INFO:tensorflow:global_step/sec: 501.693
INFO:tensorflow:loss = 51.526627, step = 63600 (0.199 sec)
INFO:tensorflow:global_step/sec: 502.694
INFO:tensorflow:loss = 37.883636, step = 63700 (0.199 sec)
INFO:tensorflow:global_step/sec: 509.949
INFO:tensorflow:loss = 23.176157, step = 63800 (0.196 sec)
INFO:tensorflow:global_step/sec: 490.338
INFO:tensorflow:loss = 31.369713, step = 63900 (0.204 sec)
INFO:tensorflow:global_step/sec: 511.831
INFO:tensorflow:loss = 41.405975, step = 64000 (0.195 sec)
INFO:tensorflow:global_step/sec: 511.341
INFO:tensorflow:loss = 56.923847, step = 64100 (0.196 sec)
INFO:tensorflow:global_step/sec: 501.663
I

<tensorflow_estimator.python.estimator.canned.linear.LinearRegressorV2 at 0x1a4d0ae990>

In [52]:
linear_evaluation = estimator.evaluate(input_fn=eval_fn)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-04-22T17:09:33Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmpm3wa1h4f/model.ckpt-68255
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2020-04-22-17:09:35
INFO:tensorflow:Saving dict for global step 68255: average_loss = 39.467354, global_step = 68255, label/mean = 10.120692, loss = 39.465862, prediction/mean = 9.872337
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 68255: /var/folders/xm/rvswt1sx4rdf

In [53]:
print(linear_evaluation)

{'average_loss': 39.467354, 'label/mean': 10.120692, 'loss': 39.465862, 'prediction/mean': 9.872337, 'global_step': 68255}


In [54]:
p = estimator.predict(input_fn = eval_fn)

In [55]:
preds = np.array([item['predictions'][0] for item in p])

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmpm3wa1h4f/model.ckpt-68255
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [56]:
_c = pd.DataFrame({'actual': val[['fare_amount']].values.reshape(-1), 'prediction': preds.reshape(-1)})

In [57]:
_c.head()

Unnamed: 0,actual,prediction
0,5.3,9.069082
1,4.5,9.671481
2,4.5,9.637139
3,24.9,11.599221
4,5.7,9.40913


In [58]:
from sklearn.metrics import mean_squared_error

In [59]:
print('MSE: {}'.format(mean_squared_error(val[['fare_amount']].values.reshape(-1), preds.reshape(-1))))

MSE: 39.46734902063094


## DNN

In [None]:
dense = tf.estimator.DNNRegressor(feature_columns=dense_columns, hidden_units=[64, 64])

In [None]:
dense.train(input_fn=train_fn, steps=None)

In [None]:
dense_evaluation = dense.evaluate(input_fn=eval_fn)

In [None]:
print(dense_evaluation)

In [None]:
p = dense.predict(input_fn = eval_fn)
preds = np.array([item['predictions'][0] for item in p])
_c = pd.DataFrame({'actual': val[['fare_amount']].values.reshape(-1), 'prediction': preds.reshape(-1)})

In [None]:
_c.head()

In [None]:
print('MSE: {}'.format(mean_squared_error(val[['fare_amount']].values.reshape(-1), preds.reshape(-1))))

## Combine the models

In [60]:
combined = tf.estimator.DNNLinearCombinedRegressor(
    #Linear Model also called Wide
    linear_feature_columns=feature_columns,
    
    #DNN
    dnn_feature_columns=dense_columns,
    dnn_hidden_units=[512,512]
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmpprz6l8f5', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a6d559e10>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [61]:
combined.train(input_fn=train_fn, steps=None)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmpprz6l8f5/model.ckpt.
INFO:tensorflow:loss = 111.30362, step = 0

INFO:tensorflow:global_step/sec: 72.7961
INFO:tensorflow:loss = 7.0227776, step = 6600 (1.374 sec)
INFO:tensorflow:global_step/sec: 73.6989
INFO:tensorflow:loss = 16.39825, step = 6700 (1.357 sec)
INFO:tensorflow:global_step/sec: 78.7298
INFO:tensorflow:loss = 9.591472, step = 6800 (1.270 sec)
INFO:tensorflow:global_step/sec: 76.4954
INFO:tensorflow:loss = 13.006988, step = 6900 (1.307 sec)
INFO:tensorflow:global_step/sec: 71.1226
INFO:tensorflow:loss = 9.908268, step = 7000 (1.406 sec)
INFO:tensorflow:global_step/sec: 73.9683
INFO:tensorflow:loss = 9.999204, step = 7100 (1.352 sec)
INFO:tensorflow:global_step/sec: 75.9616
INFO:tensorflow:loss = 15.561089, step = 7200 (1.316 sec)
INFO:tensorflow:global_step/sec: 77.0373
INFO:tensorflow:loss = 11.595737, step = 7300 (1.298 sec)
INFO:tensorflow:global_step/sec: 77.6601
INFO:tensorflow:loss = 7.234599, step = 7400 (1.288 sec)
INFO:tensorflow:global_step/sec: 67.3724
INFO:tensorflow:loss = 10.578199, step = 7500 (1.484 sec)
INFO:tensorflow

INFO:tensorflow:global_step/sec: 72.2062
INFO:tensorflow:loss = 11.182687, step = 14900 (1.385 sec)
INFO:tensorflow:global_step/sec: 73.8265
INFO:tensorflow:loss = 13.147437, step = 15000 (1.354 sec)
INFO:tensorflow:global_step/sec: 74.4163
INFO:tensorflow:loss = 8.852351, step = 15100 (1.344 sec)
INFO:tensorflow:global_step/sec: 76.0714
INFO:tensorflow:loss = 14.013429, step = 15200 (1.315 sec)
INFO:tensorflow:global_step/sec: 73.5179
INFO:tensorflow:loss = 8.523511, step = 15300 (1.360 sec)
INFO:tensorflow:global_step/sec: 68.9762
INFO:tensorflow:loss = 18.501942, step = 15400 (1.450 sec)
INFO:tensorflow:global_step/sec: 74.3097
INFO:tensorflow:loss = 10.987129, step = 15500 (1.346 sec)
INFO:tensorflow:global_step/sec: 79.238
INFO:tensorflow:loss = 23.424171, step = 15600 (1.262 sec)
INFO:tensorflow:global_step/sec: 72.1667
INFO:tensorflow:loss = 13.844648, step = 15700 (1.385 sec)
INFO:tensorflow:global_step/sec: 78.7571
INFO:tensorflow:loss = 8.182037, step = 15800 (1.270 sec)
INFO

INFO:tensorflow:loss = 6.051843, step = 23100 (1.333 sec)
INFO:tensorflow:global_step/sec: 75.8296
INFO:tensorflow:loss = 14.15835, step = 23200 (1.319 sec)
INFO:tensorflow:global_step/sec: 75.4571
INFO:tensorflow:loss = 7.2114725, step = 23300 (1.325 sec)
INFO:tensorflow:global_step/sec: 76.1632
INFO:tensorflow:loss = 9.721556, step = 23400 (1.313 sec)
INFO:tensorflow:global_step/sec: 78.6632
INFO:tensorflow:loss = 8.838713, step = 23500 (1.271 sec)
INFO:tensorflow:global_step/sec: 78.0013
INFO:tensorflow:loss = 6.200539, step = 23600 (1.282 sec)
INFO:tensorflow:global_step/sec: 76.6695
INFO:tensorflow:loss = 8.458977, step = 23700 (1.304 sec)
INFO:tensorflow:global_step/sec: 80.1585
INFO:tensorflow:loss = 24.561043, step = 23800 (1.247 sec)
INFO:tensorflow:global_step/sec: 79.2202
INFO:tensorflow:loss = 10.824478, step = 23900 (1.262 sec)
INFO:tensorflow:global_step/sec: 78.0672
INFO:tensorflow:loss = 13.52, step = 24000 (1.281 sec)
INFO:tensorflow:global_step/sec: 76.1814
INFO:tenso

INFO:tensorflow:global_step/sec: 80.2547
INFO:tensorflow:loss = 10.679767, step = 31400 (1.246 sec)
INFO:tensorflow:global_step/sec: 77.0346
INFO:tensorflow:loss = 6.175375, step = 31500 (1.298 sec)
INFO:tensorflow:global_step/sec: 81.6632
INFO:tensorflow:loss = 9.074759, step = 31600 (1.225 sec)
INFO:tensorflow:global_step/sec: 82.5934
INFO:tensorflow:loss = 10.576316, step = 31700 (1.211 sec)
INFO:tensorflow:global_step/sec: 77.4005
INFO:tensorflow:loss = 10.124743, step = 31800 (1.292 sec)
INFO:tensorflow:global_step/sec: 70.4254
INFO:tensorflow:loss = 21.926422, step = 31900 (1.420 sec)
INFO:tensorflow:global_step/sec: 72.5461
INFO:tensorflow:loss = 16.038248, step = 32000 (1.378 sec)
INFO:tensorflow:global_step/sec: 65.2828
INFO:tensorflow:loss = 14.123531, step = 32100 (1.533 sec)
INFO:tensorflow:global_step/sec: 65.2936
INFO:tensorflow:loss = 12.658644, step = 32200 (1.531 sec)
INFO:tensorflow:global_step/sec: 66.0262
INFO:tensorflow:loss = 5.5300627, step = 32300 (1.515 sec)
IN

INFO:tensorflow:loss = 21.206991, step = 39600 (1.376 sec)
INFO:tensorflow:global_step/sec: 59.2741
INFO:tensorflow:loss = 8.651245, step = 39700 (1.688 sec)
INFO:tensorflow:global_step/sec: 58.2378
INFO:tensorflow:loss = 7.3529544, step = 39800 (1.717 sec)
INFO:tensorflow:global_step/sec: 64.7306
INFO:tensorflow:loss = 13.1825485, step = 39900 (1.544 sec)
INFO:tensorflow:global_step/sec: 70.96
INFO:tensorflow:loss = 13.649188, step = 40000 (1.409 sec)
INFO:tensorflow:global_step/sec: 67.8207
INFO:tensorflow:loss = 21.984915, step = 40100 (1.474 sec)
INFO:tensorflow:global_step/sec: 66.7085
INFO:tensorflow:loss = 8.091708, step = 40200 (1.499 sec)
INFO:tensorflow:global_step/sec: 58.7592
INFO:tensorflow:loss = 7.165436, step = 40300 (1.702 sec)
INFO:tensorflow:global_step/sec: 60.3409
INFO:tensorflow:loss = 8.117997, step = 40400 (1.657 sec)
INFO:tensorflow:global_step/sec: 62.6646
INFO:tensorflow:loss = 10.204683, step = 40500 (1.596 sec)
INFO:tensorflow:global_step/sec: 72.6944
INFO:

INFO:tensorflow:global_step/sec: 66.2794
INFO:tensorflow:loss = 5.726921, step = 47800 (1.509 sec)
INFO:tensorflow:global_step/sec: 72.762
INFO:tensorflow:loss = 6.0945005, step = 47900 (1.374 sec)
INFO:tensorflow:global_step/sec: 75.1817
INFO:tensorflow:loss = 11.357817, step = 48000 (1.330 sec)
INFO:tensorflow:global_step/sec: 73.3668
INFO:tensorflow:loss = 11.732589, step = 48100 (1.363 sec)
INFO:tensorflow:global_step/sec: 76.9532
INFO:tensorflow:loss = 15.384505, step = 48200 (1.300 sec)
INFO:tensorflow:global_step/sec: 82.4119
INFO:tensorflow:loss = 11.501652, step = 48300 (1.213 sec)
INFO:tensorflow:global_step/sec: 84.0927
INFO:tensorflow:loss = 6.0913906, step = 48400 (1.189 sec)
INFO:tensorflow:global_step/sec: 82.8867
INFO:tensorflow:loss = 5.001649, step = 48500 (1.206 sec)
INFO:tensorflow:global_step/sec: 84.3043
INFO:tensorflow:loss = 9.927964, step = 48600 (1.186 sec)
INFO:tensorflow:global_step/sec: 83.1452
INFO:tensorflow:loss = 14.070978, step = 48700 (1.203 sec)
INFO

INFO:tensorflow:loss = 6.591506, step = 56000 (1.374 sec)
INFO:tensorflow:global_step/sec: 75.2041
INFO:tensorflow:loss = 9.054193, step = 56100 (1.330 sec)
INFO:tensorflow:global_step/sec: 76.2654
INFO:tensorflow:loss = 7.343486, step = 56200 (1.311 sec)
INFO:tensorflow:global_step/sec: 79.1873
INFO:tensorflow:loss = 8.926682, step = 56300 (1.263 sec)
INFO:tensorflow:global_step/sec: 75.8021
INFO:tensorflow:loss = 7.4456286, step = 56400 (1.319 sec)
INFO:tensorflow:global_step/sec: 79.654
INFO:tensorflow:loss = 7.8378625, step = 56500 (1.255 sec)
INFO:tensorflow:global_step/sec: 71.8408
INFO:tensorflow:loss = 10.07394, step = 56600 (1.392 sec)
INFO:tensorflow:global_step/sec: 68.8407
INFO:tensorflow:loss = 11.909215, step = 56700 (1.452 sec)
INFO:tensorflow:global_step/sec: 74.584
INFO:tensorflow:loss = 6.655011, step = 56800 (1.341 sec)
INFO:tensorflow:global_step/sec: 72.5421
INFO:tensorflow:loss = 13.259745, step = 56900 (1.379 sec)
INFO:tensorflow:global_step/sec: 72.5015
INFO:ten

INFO:tensorflow:global_step/sec: 84.0246
INFO:tensorflow:loss = 11.062559, step = 64300 (1.190 sec)
INFO:tensorflow:global_step/sec: 81.961
INFO:tensorflow:loss = 12.215051, step = 64400 (1.220 sec)
INFO:tensorflow:global_step/sec: 81.6896
INFO:tensorflow:loss = 9.332947, step = 64500 (1.224 sec)
INFO:tensorflow:global_step/sec: 81.1867
INFO:tensorflow:loss = 5.6282673, step = 64600 (1.232 sec)
INFO:tensorflow:global_step/sec: 85.5192
INFO:tensorflow:loss = 12.301169, step = 64700 (1.169 sec)
INFO:tensorflow:global_step/sec: 84.1272
INFO:tensorflow:loss = 7.195067, step = 64800 (1.189 sec)
INFO:tensorflow:global_step/sec: 82.4218
INFO:tensorflow:loss = 12.792678, step = 64900 (1.213 sec)
INFO:tensorflow:global_step/sec: 85.0158
INFO:tensorflow:loss = 6.3598022, step = 65000 (1.176 sec)
INFO:tensorflow:global_step/sec: 82.3367
INFO:tensorflow:loss = 8.184109, step = 65100 (1.215 sec)
INFO:tensorflow:global_step/sec: 70.0839
INFO:tensorflow:loss = 9.015247, step = 65200 (1.427 sec)
INFO:

<tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressorV2 at 0x1a6d56a9d0>

In [62]:
print(combined.evaluate(input_fn=val_fn))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-04-22T17:26:11Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmpprz6l8f5/model.ckpt-68255
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished 

In [63]:
p = combined.predict(input_fn = eval_fn)
preds = np.array([item['predictions'][0] for item in p])
_c = pd.DataFrame({'actual': val[['fare_amount']].values.reshape(-1), 'prediction': preds.reshape(-1)})

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmpprz6l8f5/model.ckpt-68255
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [64]:
_c.head(n=10)

Unnamed: 0,actual,prediction
0,5.3,6.402178
1,4.5,6.038137
2,4.5,6.035545
3,24.9,22.713215
4,5.7,8.190193
5,12.9,7.922903
6,9.0,8.629894
7,14.5,14.044405
8,10.5,11.086047
9,5.0,5.892992


In [65]:
print('MSE: {}'.format(mean_squared_error(val[['fare_amount']].values.reshape(-1), preds.reshape(-1))))

MSE: 10.662179912335889


# number of parameters we were dealing with

In [None]:
d = keras.Sequential([
    keras.layers.Dense(512, input_shape=(116000, ), activation='relu'),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(1)
])
print(d.summary())