In [226]:
# A linear regression learning algorithm example using TensorFlow library.

from __future__ import print_function

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.contrib import learn
from sklearn import metrics
import random

rng = np.random

#read csv file
datapath = "./"
Ha_Noi = pd.read_csv(datapath+"OnlineDrivers_HaNoi_10days.csv")
#Add an additional column into the table
# sLength = len(Ha_Noi['accept_rate'])
Ha_Noi['accept_rate_timeT'] = pd.Series(Ha_Noi['accept_rate'], index=Ha_Noi.index)
#Shift the entries in the accept_rate column upward
Ha_Noi.accept_rate = Ha_Noi.accept_rate.shift(-1)
Ha_Noi['Pricing_timeT'] =  pd.Series(Ha_Noi['Pricing'], index=Ha_Noi.index)
Ha_Noi.Pricing = Ha_Noi.Pricing.shift(-1)

#Drop all the "na" entries in the original table
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent4"])
Ha_Noi = Ha_Noi.dropna(subset=["accept_rate"])
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent2"])
Ha_Noi = Ha_Noi.drop(Ha_Noi[Ha_Noi.Percentchange_onlinedrivers == 0].index)
Ha_Noi = Ha_Noi.dropna(subset = ["DriverBusyRate"])


#define normalized function for our dataset
# def normalize(array):
#     return (array - array.mean()) / array.std()

df2 = pd.DataFrame(Ha_Noi)

#split the dataset into training and testing sets
train_set, test_set = train_test_split(Ha_Noi, test_size=0.2, random_state = random.randint(20, 200))


# Training Data
train_X =  train_set[['Pricing_timeT', 'accept_rate_timeT', 'request', 'long_waiting', 'longwait_percent1','Request/Supply', 'DriverBusyRate', 'Hour', 'wd1', 'wd2', 'wd3', 'wd4', 'wd5', 'wd6', 'wd7']]
train_Y =  train_set['Pricing']

#Testing Data
Xtest = test_set[['Pricing_timeT','accept_rate_timeT', 'request','long_waiting', 'longwait_percent1', 'Request/Supply','DriverBusyRate', 'Hour', 'wd1', 'wd2', 'wd3', 'wd4', 'wd5', 'wd6', 'wd7']]
Ytest = test_set['Pricing']


# 'longwait_percent2', 'accept_rate_timeT', 'Percentchange_onlinedrivers','DriverBusyRate'





Comment: We choose all the features that have the highest positive correlations with Pricing based on the correlation testing obtained in the 3rd slide. This resulted in *7* features plus *2* obvious *lattitude features* to take into account the weekly and seasonality effects (i.e, Hour and Day of the week).

The reason we decided to add the acceptance rate in the previous 20-mins is because of its strong negative correlations with pricing, which prevents the model from overfitting and *gives* it the ability to now when to *decrease* the price (as it's clear that pricing has strong negative correlation with acceptance rate in the previous 20-mins, at least based on our historical data from July 3rd to August 2nd). Without it, our best model - Random Forest Regressor - would always *increase* the price at the moment when we *actually* decreased it (we obtain this observation by testing our Random Forest Regression model with historical data). Also, we chose the factor "longwait_percent1" mainly due to its stronger positive correlation with pricing compared to other "longwait_percentX" factors (X = 2,3,4). Longwait_Percent1 demonstrates the fact that the later a request is made in the 20-min time interval, the higher importance it is to the acceptance rate in the next 20-min period. The factor Request/Supply is added because our goal for this project is to choose a price that *BALANCES* supply (number of online drivers) vs demand (total request), which is the same as balance the magnitude of Request/Supply. 

In [202]:
# Parameters
num_epochs = 1000
STEPS = 150000
BATCH_SIZE = 80

#Deep Neural Network Regressor 
feature_column1 = learn.infer_real_valued_columns_from_input(train_X)
# feature_column2 = learn.infer_real_valued_columns_from_input(train_X2)
regressor = learn.DNNRegressor(feature_columns = feature_column1, hidden_units= [100,100], model_dir = "./SP2")
regressor.fit(train_X, train_Y, max_steps= STEPS, batch_size= BATCH_SIZE)
Ypred = regressor.predict_scores(Xtest, as_iterable=False)
Ypred = np.asarray(list(Ypred))
rmse = np.sqrt(((Ypred - Ytest) ** 2).mean(axis=0))
print("Root mean square Error: %.3f" %rmse)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd974649850>, '_model_dir': './SP2', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_evaluation_master': '', '_master': ''}
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Sci

INFO:tensorflow:global_step/sec: 246.9
INFO:tensorflow:loss = 0.004765, step = 5201 (0.406 sec)
INFO:tensorflow:global_step/sec: 233.792
INFO:tensorflow:loss = 0.00406946, step = 5301 (0.427 sec)
INFO:tensorflow:global_step/sec: 238.695
INFO:tensorflow:loss = 0.00406121, step = 5401 (0.419 sec)
INFO:tensorflow:global_step/sec: 253.653
INFO:tensorflow:loss = 0.00407988, step = 5501 (0.397 sec)
INFO:tensorflow:global_step/sec: 210.739
INFO:tensorflow:loss = 0.00283275, step = 5601 (0.473 sec)
INFO:tensorflow:global_step/sec: 208.756
INFO:tensorflow:loss = 0.00196232, step = 5701 (0.479 sec)
INFO:tensorflow:global_step/sec: 211.548
INFO:tensorflow:loss = 0.00666998, step = 5801 (0.473 sec)
INFO:tensorflow:global_step/sec: 199.943
INFO:tensorflow:loss = 0.00371534, step = 5901 (0.502 sec)
INFO:tensorflow:global_step/sec: 219.902
INFO:tensorflow:loss = 0.00289057, step = 6001 (0.452 sec)
INFO:tensorflow:global_step/sec: 223.297
INFO:tensorflow:loss = 0.00277284, step = 6101 (0.449 sec)
INFO

INFO:tensorflow:global_step/sec: 223.302
INFO:tensorflow:loss = 0.00253478, step = 13401 (0.449 sec)
INFO:tensorflow:global_step/sec: 225.294
INFO:tensorflow:loss = 0.00259623, step = 13501 (0.443 sec)
INFO:tensorflow:global_step/sec: 244.41
INFO:tensorflow:loss = 0.00238001, step = 13601 (0.409 sec)
INFO:tensorflow:global_step/sec: 236.728
INFO:tensorflow:loss = 0.00225737, step = 13701 (0.422 sec)
INFO:tensorflow:global_step/sec: 234.184
INFO:tensorflow:loss = 0.00192157, step = 13801 (0.427 sec)
INFO:tensorflow:global_step/sec: 195.715
INFO:tensorflow:loss = 0.00251663, step = 13901 (0.511 sec)
INFO:tensorflow:global_step/sec: 220.29
INFO:tensorflow:loss = 0.00248308, step = 14001 (0.454 sec)
INFO:tensorflow:global_step/sec: 255.577
INFO:tensorflow:loss = 0.00290415, step = 14101 (0.392 sec)
INFO:tensorflow:global_step/sec: 265.21
INFO:tensorflow:loss = 0.00240157, step = 14201 (0.376 sec)
INFO:tensorflow:global_step/sec: 244.526
INFO:tensorflow:loss = 0.00317388, step = 14301 (0.41

INFO:tensorflow:loss = 0.00162589, step = 21501 (0.458 sec)
INFO:tensorflow:global_step/sec: 182.534
INFO:tensorflow:loss = 0.00240163, step = 21601 (0.548 sec)
INFO:tensorflow:global_step/sec: 211.32
INFO:tensorflow:loss = 0.00277375, step = 21701 (0.473 sec)
INFO:tensorflow:global_step/sec: 243.512
INFO:tensorflow:loss = 0.0017301, step = 21801 (0.410 sec)
INFO:tensorflow:global_step/sec: 229.026
INFO:tensorflow:loss = 0.00173283, step = 21901 (0.437 sec)
INFO:tensorflow:global_step/sec: 235.147
INFO:tensorflow:loss = 0.00177156, step = 22001 (0.426 sec)
INFO:tensorflow:global_step/sec: 216.115
INFO:tensorflow:loss = 0.00059754, step = 22101 (0.462 sec)
INFO:tensorflow:global_step/sec: 200.92
INFO:tensorflow:loss = 0.00183902, step = 22201 (0.499 sec)
INFO:tensorflow:global_step/sec: 248.743
INFO:tensorflow:loss = 0.00147436, step = 22301 (0.403 sec)
INFO:tensorflow:global_step/sec: 205.444
INFO:tensorflow:loss = 0.00139948, step = 22401 (0.485 sec)
INFO:tensorflow:global_step/sec: 2

INFO:tensorflow:global_step/sec: 208.571
INFO:tensorflow:loss = 0.00131411, step = 29701 (0.479 sec)
INFO:tensorflow:global_step/sec: 224.252
INFO:tensorflow:loss = 0.00490567, step = 29801 (0.453 sec)
INFO:tensorflow:global_step/sec: 223.994
INFO:tensorflow:loss = 0.00291178, step = 29901 (0.440 sec)
INFO:tensorflow:global_step/sec: 236.665
INFO:tensorflow:loss = 0.00120363, step = 30001 (0.422 sec)
INFO:tensorflow:global_step/sec: 207.911
INFO:tensorflow:loss = 0.00145765, step = 30101 (0.481 sec)
INFO:tensorflow:global_step/sec: 228.17
INFO:tensorflow:loss = 0.00206954, step = 30201 (0.438 sec)
INFO:tensorflow:global_step/sec: 214.953
INFO:tensorflow:loss = 0.00110666, step = 30301 (0.468 sec)
INFO:tensorflow:global_step/sec: 209.038
INFO:tensorflow:loss = 0.00229511, step = 30401 (0.478 sec)
INFO:tensorflow:global_step/sec: 229.305
INFO:tensorflow:loss = 0.00404718, step = 30501 (0.433 sec)
INFO:tensorflow:global_step/sec: 226.437
INFO:tensorflow:loss = 0.00226187, step = 30601 (0.

INFO:tensorflow:loss = 0.00125989, step = 37801 (0.460 sec)
INFO:tensorflow:global_step/sec: 214.01
INFO:tensorflow:loss = 0.00123192, step = 37901 (0.467 sec)
INFO:tensorflow:global_step/sec: 213.001
INFO:tensorflow:loss = 0.00112853, step = 38001 (0.470 sec)
INFO:tensorflow:global_step/sec: 226.367
INFO:tensorflow:loss = 0.00162328, step = 38101 (0.441 sec)
INFO:tensorflow:global_step/sec: 204.856
INFO:tensorflow:loss = 0.000929876, step = 38201 (0.491 sec)
INFO:tensorflow:global_step/sec: 214.898
INFO:tensorflow:loss = 0.00466165, step = 38301 (0.463 sec)
INFO:tensorflow:global_step/sec: 222.691
INFO:tensorflow:loss = 0.00307418, step = 38401 (0.449 sec)
INFO:tensorflow:global_step/sec: 235.423
INFO:tensorflow:loss = 0.00124633, step = 38501 (0.429 sec)
INFO:tensorflow:global_step/sec: 228.643
INFO:tensorflow:loss = 0.000799215, step = 38601 (0.434 sec)
INFO:tensorflow:global_step/sec: 212.182
INFO:tensorflow:loss = 0.00144287, step = 38701 (0.471 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:global_step/sec: 223.804
INFO:tensorflow:loss = 0.00375559, step = 46001 (0.448 sec)
INFO:tensorflow:global_step/sec: 231.102
INFO:tensorflow:loss = 0.000797202, step = 46101 (0.432 sec)
INFO:tensorflow:global_step/sec: 224.034
INFO:tensorflow:loss = 0.00100586, step = 46201 (0.447 sec)
INFO:tensorflow:global_step/sec: 214.566
INFO:tensorflow:loss = 0.00116631, step = 46301 (0.465 sec)
INFO:tensorflow:global_step/sec: 256.563
INFO:tensorflow:loss = 0.00103565, step = 46401 (0.392 sec)
INFO:tensorflow:global_step/sec: 230.057
INFO:tensorflow:loss = 0.00113366, step = 46501 (0.433 sec)
INFO:tensorflow:global_step/sec: 221.847
INFO:tensorflow:loss = 0.00115137, step = 46601 (0.451 sec)
INFO:tensorflow:global_step/sec: 236.081
INFO:tensorflow:loss = 0.00166049, step = 46701 (0.423 sec)
INFO:tensorflow:global_step/sec: 230.592
INFO:tensorflow:loss = 0.000714571, step = 46801 (0.434 sec)
INFO:tensorflow:global_step/sec: 228.579
INFO:tensorflow:loss = 0.00102048, step = 46901 

INFO:tensorflow:loss = 0.00142215, step = 54101 (0.537 sec)
INFO:tensorflow:global_step/sec: 245.189
INFO:tensorflow:loss = 0.00497272, step = 54201 (0.407 sec)
INFO:tensorflow:global_step/sec: 234.66
INFO:tensorflow:loss = 0.000667256, step = 54301 (0.426 sec)
INFO:tensorflow:global_step/sec: 215.542
INFO:tensorflow:loss = 0.00253236, step = 54401 (0.464 sec)
INFO:tensorflow:global_step/sec: 212.382
INFO:tensorflow:loss = 0.00386255, step = 54501 (0.471 sec)
INFO:tensorflow:global_step/sec: 187.853
INFO:tensorflow:loss = 0.00107172, step = 54601 (0.531 sec)
INFO:tensorflow:global_step/sec: 202.64
INFO:tensorflow:loss = 0.00194963, step = 54701 (0.494 sec)
INFO:tensorflow:global_step/sec: 239.535
INFO:tensorflow:loss = 0.00101877, step = 54801 (0.417 sec)
INFO:tensorflow:global_step/sec: 226.607
INFO:tensorflow:loss = 0.00102398, step = 54901 (0.441 sec)
INFO:tensorflow:global_step/sec: 211.905
INFO:tensorflow:loss = 0.00133215, step = 55001 (0.472 sec)
INFO:tensorflow:global_step/sec:

INFO:tensorflow:global_step/sec: 219.015
INFO:tensorflow:loss = 0.00158786, step = 62301 (0.459 sec)
INFO:tensorflow:global_step/sec: 207.795
INFO:tensorflow:loss = 0.0043379, step = 62401 (0.479 sec)
INFO:tensorflow:global_step/sec: 222.963
INFO:tensorflow:loss = 0.000767242, step = 62501 (0.450 sec)
INFO:tensorflow:global_step/sec: 172.255
INFO:tensorflow:loss = 0.00346081, step = 62601 (0.581 sec)
INFO:tensorflow:global_step/sec: 179.46
INFO:tensorflow:loss = 0.00270994, step = 62701 (0.556 sec)
INFO:tensorflow:global_step/sec: 225.335
INFO:tensorflow:loss = 0.00133783, step = 62801 (0.443 sec)
INFO:tensorflow:global_step/sec: 206.64
INFO:tensorflow:loss = 0.0018723, step = 62901 (0.485 sec)
INFO:tensorflow:global_step/sec: 190.873
INFO:tensorflow:loss = 0.00227626, step = 63001 (0.526 sec)
INFO:tensorflow:global_step/sec: 205.105
INFO:tensorflow:loss = 0.00131853, step = 63101 (0.485 sec)
INFO:tensorflow:global_step/sec: 226.324
INFO:tensorflow:loss = 0.000771994, step = 63201 (0.4

INFO:tensorflow:loss = 0.000768678, step = 70401 (0.425 sec)
INFO:tensorflow:global_step/sec: 246.808
INFO:tensorflow:loss = 0.000896531, step = 70501 (0.405 sec)
INFO:tensorflow:global_step/sec: 224.501
INFO:tensorflow:loss = 0.00259908, step = 70601 (0.446 sec)
INFO:tensorflow:global_step/sec: 248.095
INFO:tensorflow:loss = 0.000829095, step = 70701 (0.403 sec)
INFO:tensorflow:global_step/sec: 248.628
INFO:tensorflow:loss = 0.00188448, step = 70801 (0.405 sec)
INFO:tensorflow:global_step/sec: 224.373
INFO:tensorflow:loss = 0.0011344, step = 70901 (0.444 sec)
INFO:tensorflow:global_step/sec: 223.117
INFO:tensorflow:loss = 0.00140281, step = 71001 (0.448 sec)
INFO:tensorflow:global_step/sec: 240.621
INFO:tensorflow:loss = 0.000883656, step = 71101 (0.416 sec)
INFO:tensorflow:global_step/sec: 235.802
INFO:tensorflow:loss = 0.00162611, step = 71201 (0.424 sec)
INFO:tensorflow:global_step/sec: 237.069
INFO:tensorflow:loss = 0.000522698, step = 71301 (0.422 sec)
INFO:tensorflow:global_step

INFO:tensorflow:loss = 0.000907924, step = 78501 (0.441 sec)
INFO:tensorflow:global_step/sec: 244.609
INFO:tensorflow:loss = 0.00114846, step = 78601 (0.409 sec)
INFO:tensorflow:global_step/sec: 218.186
INFO:tensorflow:loss = 0.00130767, step = 78701 (0.458 sec)
INFO:tensorflow:global_step/sec: 238.389
INFO:tensorflow:loss = 0.00149522, step = 78801 (0.419 sec)
INFO:tensorflow:global_step/sec: 230.68
INFO:tensorflow:loss = 0.00224256, step = 78901 (0.434 sec)
INFO:tensorflow:global_step/sec: 239.869
INFO:tensorflow:loss = 0.00388891, step = 79001 (0.417 sec)
INFO:tensorflow:global_step/sec: 235.335
INFO:tensorflow:loss = 0.00235297, step = 79101 (0.424 sec)
INFO:tensorflow:global_step/sec: 223.864
INFO:tensorflow:loss = 0.00101695, step = 79201 (0.447 sec)
INFO:tensorflow:global_step/sec: 252.256
INFO:tensorflow:loss = 0.00189639, step = 79301 (0.396 sec)
INFO:tensorflow:global_step/sec: 193.169
INFO:tensorflow:loss = 0.000699501, step = 79401 (0.518 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:loss = 0.000673416, step = 86601 (0.479 sec)
INFO:tensorflow:global_step/sec: 232.151
INFO:tensorflow:loss = 0.00198294, step = 86701 (0.431 sec)
INFO:tensorflow:global_step/sec: 211.459
INFO:tensorflow:loss = 0.000562013, step = 86801 (0.473 sec)
INFO:tensorflow:global_step/sec: 196.648
INFO:tensorflow:loss = 0.00415467, step = 86901 (0.509 sec)
INFO:tensorflow:global_step/sec: 216.972
INFO:tensorflow:loss = 0.000786157, step = 87001 (0.463 sec)
INFO:tensorflow:global_step/sec: 264.37
INFO:tensorflow:loss = 0.000789475, step = 87101 (0.376 sec)
INFO:tensorflow:global_step/sec: 240.999
INFO:tensorflow:loss = 0.000674632, step = 87201 (0.415 sec)
INFO:tensorflow:global_step/sec: 240.699
INFO:tensorflow:loss = 0.00162107, step = 87301 (0.416 sec)
INFO:tensorflow:global_step/sec: 229.3
INFO:tensorflow:loss = 0.000317376, step = 87401 (0.436 sec)
INFO:tensorflow:global_step/sec: 233.34
INFO:tensorflow:loss = 0.00098381, step = 87501 (0.429 sec)
INFO:tensorflow:global_step/s

INFO:tensorflow:global_step/sec: 230.866
INFO:tensorflow:loss = 0.000840369, step = 94801 (0.433 sec)
INFO:tensorflow:global_step/sec: 210.552
INFO:tensorflow:loss = 0.000986505, step = 94901 (0.476 sec)
INFO:tensorflow:global_step/sec: 225.386
INFO:tensorflow:loss = 0.000459074, step = 95001 (0.442 sec)
INFO:tensorflow:global_step/sec: 234.84
INFO:tensorflow:loss = 0.000411859, step = 95101 (0.426 sec)
INFO:tensorflow:global_step/sec: 242.702
INFO:tensorflow:loss = 0.000469251, step = 95201 (0.412 sec)
INFO:tensorflow:global_step/sec: 242.534
INFO:tensorflow:loss = 0.00208349, step = 95301 (0.412 sec)
INFO:tensorflow:global_step/sec: 228.987
INFO:tensorflow:loss = 0.000691915, step = 95401 (0.437 sec)
INFO:tensorflow:global_step/sec: 231.311
INFO:tensorflow:loss = 0.00372659, step = 95501 (0.433 sec)
INFO:tensorflow:global_step/sec: 193.671
INFO:tensorflow:loss = 0.000594726, step = 95601 (0.516 sec)
INFO:tensorflow:global_step/sec: 207.247
INFO:tensorflow:loss = 0.00247883, step = 95

INFO:tensorflow:global_step/sec: 201.442
INFO:tensorflow:loss = 0.000734008, step = 102901 (0.497 sec)
INFO:tensorflow:global_step/sec: 213.266
INFO:tensorflow:loss = 0.00011847, step = 103001 (0.469 sec)
INFO:tensorflow:global_step/sec: 172.473
INFO:tensorflow:loss = 0.000443921, step = 103101 (0.580 sec)
INFO:tensorflow:global_step/sec: 190.792
INFO:tensorflow:loss = 0.0010296, step = 103201 (0.525 sec)
INFO:tensorflow:global_step/sec: 172.608
INFO:tensorflow:loss = 0.00366137, step = 103301 (0.585 sec)
INFO:tensorflow:global_step/sec: 211.628
INFO:tensorflow:loss = 0.0012554, step = 103401 (0.465 sec)
INFO:tensorflow:global_step/sec: 223.583
INFO:tensorflow:loss = 0.000775771, step = 103501 (0.447 sec)
INFO:tensorflow:global_step/sec: 194.739
INFO:tensorflow:loss = 0.00102331, step = 103601 (0.517 sec)
INFO:tensorflow:global_step/sec: 205.439
INFO:tensorflow:loss = 0.0015602, step = 103701 (0.484 sec)
INFO:tensorflow:global_step/sec: 220.34
INFO:tensorflow:loss = 0.00067556, step = 

INFO:tensorflow:loss = 0.00111492, step = 110901 (0.509 sec)
INFO:tensorflow:global_step/sec: 200.419
INFO:tensorflow:loss = 0.000285279, step = 111001 (0.500 sec)
INFO:tensorflow:global_step/sec: 198.131
INFO:tensorflow:loss = 0.000780794, step = 111101 (0.504 sec)
INFO:tensorflow:global_step/sec: 212.05
INFO:tensorflow:loss = 0.000913606, step = 111201 (0.474 sec)
INFO:tensorflow:global_step/sec: 230.816
INFO:tensorflow:loss = 0.00151727, step = 111301 (0.434 sec)
INFO:tensorflow:global_step/sec: 226.853
INFO:tensorflow:loss = 0.0012473, step = 111401 (0.438 sec)
INFO:tensorflow:global_step/sec: 222.836
INFO:tensorflow:loss = 0.000508589, step = 111501 (0.449 sec)
INFO:tensorflow:global_step/sec: 189.165
INFO:tensorflow:loss = 0.000778121, step = 111601 (0.529 sec)
INFO:tensorflow:global_step/sec: 178.437
INFO:tensorflow:loss = 0.000688433, step = 111701 (0.560 sec)
INFO:tensorflow:global_step/sec: 226.677
INFO:tensorflow:loss = 0.000725292, step = 111801 (0.442 sec)
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 220.592
INFO:tensorflow:loss = 0.000871286, step = 119001 (0.458 sec)
INFO:tensorflow:global_step/sec: 238.171
INFO:tensorflow:loss = 0.000955051, step = 119101 (0.415 sec)
INFO:tensorflow:global_step/sec: 253.96
INFO:tensorflow:loss = 0.000457973, step = 119201 (0.398 sec)
INFO:tensorflow:global_step/sec: 244.629
INFO:tensorflow:loss = 0.00180315, step = 119301 (0.405 sec)
INFO:tensorflow:global_step/sec: 236.7
INFO:tensorflow:loss = 0.00136002, step = 119401 (0.423 sec)
INFO:tensorflow:global_step/sec: 251.504
INFO:tensorflow:loss = 0.00107463, step = 119501 (0.398 sec)
INFO:tensorflow:global_step/sec: 179.572
INFO:tensorflow:loss = 0.00512096, step = 119601 (0.557 sec)
INFO:tensorflow:global_step/sec: 205.465
INFO:tensorflow:loss = 0.00143846, step = 119701 (0.487 sec)
INFO:tensorflow:global_step/sec: 253.828
INFO:tensorflow:loss = 0.00049203, step = 119801 (0.393 sec)
INFO:tensorflow:global_step/sec: 241.423
INFO:tensorflow:loss = 0.00108801, step =

INFO:tensorflow:loss = 0.00106822, step = 127001 (0.479 sec)
INFO:tensorflow:global_step/sec: 248.118
INFO:tensorflow:loss = 0.00339721, step = 127101 (0.403 sec)
INFO:tensorflow:global_step/sec: 214.174
INFO:tensorflow:loss = 0.000782967, step = 127201 (0.467 sec)
INFO:tensorflow:global_step/sec: 207.952
INFO:tensorflow:loss = 0.000641748, step = 127301 (0.481 sec)
INFO:tensorflow:global_step/sec: 209.35
INFO:tensorflow:loss = 0.00110782, step = 127401 (0.477 sec)
INFO:tensorflow:global_step/sec: 198.719
INFO:tensorflow:loss = 0.000358608, step = 127501 (0.503 sec)
INFO:tensorflow:global_step/sec: 188.314
INFO:tensorflow:loss = 0.000722912, step = 127601 (0.531 sec)
INFO:tensorflow:global_step/sec: 208.496
INFO:tensorflow:loss = 0.000685916, step = 127701 (0.479 sec)
INFO:tensorflow:global_step/sec: 241.375
INFO:tensorflow:loss = 0.000508415, step = 127801 (0.414 sec)
INFO:tensorflow:global_step/sec: 222.951
INFO:tensorflow:loss = 0.00101804, step = 127901 (0.449 sec)
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 219.957
INFO:tensorflow:loss = 0.00080722, step = 135001 (0.454 sec)
INFO:tensorflow:global_step/sec: 254.257
INFO:tensorflow:loss = 0.00141129, step = 135101 (0.394 sec)
INFO:tensorflow:global_step/sec: 260.666
INFO:tensorflow:loss = 0.000617042, step = 135201 (0.383 sec)
INFO:tensorflow:global_step/sec: 250.126
INFO:tensorflow:loss = 0.000769439, step = 135301 (0.399 sec)
INFO:tensorflow:global_step/sec: 210.508
INFO:tensorflow:loss = 0.00369151, step = 135401 (0.478 sec)
INFO:tensorflow:global_step/sec: 219.378
INFO:tensorflow:loss = 0.000661641, step = 135501 (0.453 sec)
INFO:tensorflow:global_step/sec: 240.95
INFO:tensorflow:loss = 0.00162801, step = 135601 (0.422 sec)
INFO:tensorflow:global_step/sec: 224.842
INFO:tensorflow:loss = 0.000472984, step = 135701 (0.438 sec)
INFO:tensorflow:global_step/sec: 232.872
INFO:tensorflow:loss = 0.00138559, step = 135801 (0.429 sec)
INFO:tensorflow:global_step/sec: 254.195
INFO:tensorflow:loss = 0.000216408, st

INFO:tensorflow:loss = 0.00129573, step = 143001 (0.475 sec)
INFO:tensorflow:global_step/sec: 260.368
INFO:tensorflow:loss = 0.000907342, step = 143101 (0.384 sec)
INFO:tensorflow:global_step/sec: 242.348
INFO:tensorflow:loss = 0.00170149, step = 143201 (0.413 sec)
INFO:tensorflow:global_step/sec: 246.936
INFO:tensorflow:loss = 0.000592807, step = 143301 (0.405 sec)
INFO:tensorflow:global_step/sec: 223.075
INFO:tensorflow:loss = 0.000487236, step = 143401 (0.448 sec)
INFO:tensorflow:global_step/sec: 250.359
INFO:tensorflow:loss = 0.000435744, step = 143501 (0.399 sec)
INFO:tensorflow:global_step/sec: 240.442
INFO:tensorflow:loss = 0.00115886, step = 143601 (0.416 sec)
INFO:tensorflow:global_step/sec: 239.773
INFO:tensorflow:loss = 0.00367231, step = 143701 (0.417 sec)
INFO:tensorflow:global_step/sec: 247.449
INFO:tensorflow:loss = 0.000401315, step = 143801 (0.405 sec)
INFO:tensorflow:global_step/sec: 220.374
INFO:tensorflow:loss = 0.000784779, step = 143901 (0.453 sec)
INFO:tensorflow

Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Restoring parameters from ./SP2/model.ckpt-150000
Root mean square Error: 0.038


Comment: We use DNN Regression Model with 2 layers: 1 input with 100 nodes, and 1 output with 100 nodes, and train our neural networks using the train set X obtained from a random cut to our original dataset,  and then measure its accuracy with RMSE using the test set X. We save our model into the same folder as our Python code with the file name "SP". In this case, the RMSE of DNN Regression is much higher than that of RF or Linear Regression Model

In [219]:
x = np.array([0.56,0.2, 18,0, 0, 0, 0, 0, 0, 1])
feature_column1 = learn.infer_real_valued_columns_from_input(x)

y = np.array([[1.25, 0.6, 35, 8, 0.4, 0.1136, 0.0965, 0.472222*24, 1, 0, 0, 0, 0, 0, 0]])
new_regressor = learn.DNNRegressor(feature_columns = feature_column1, hidden_units= [100,100], model_dir = './SP')
new_regressor.predict_scores(y, as_iterable = False)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd96e6c0d90>, '_model_dir': './SP', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_evaluation_master': '', '_master': ''}
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch

array([ 1.33410716], dtype=float32)

Comment: We load our saved neural network model, and then use it to make prediction with new input. Note that the dimension of the *VECTOR* input always *EQUALS* to (number of columns of train_X x 1). The prediction here is sometimes more accurate than that of RF (although they are pretty close), but most of the time it is worse than RF. Furthermore, RF is way better than Linear Regression model for this particular problem when testing with real-world inputs (based on trends and how accurate the predicted value is compared to actual).

# Correlation testing between acceptance rate, online drivers and percent change in online drivers

In [133]:
corr_matrix = Ha_Noi.corr()
corr_matrix["Pricing"].sort_values(ascending=False)

# %matplotlib inline
# import matplotlib.pyplot as plt
# df2.hist(bins = 50, figsize = (15, 15))



Pricing                                          1.000000
Pricing_timeT                                    0.897853
longwait_percent1                                0.492831
longwait_percent2                                0.492808
longwait_percent4                                0.492402
longwait_percent3                                0.490960
long_waiting                                     0.342758
wd2                                              0.159849
Request/Supply                                   0.126120
request                                          0.055332
wd1                                              0.054917
DriverBusyRate                                   0.017719
wd6                                              0.014756
wd4                                             -0.003362
Percentchange_onlinedrivers                     -0.013461
wd5                                             -0.027740
wd3                                             -0.040631
online_drivers

# Random Forest Algorithm and Model Evaluations using Cross-Validation

In [224]:
import numpy as np
from sklearn.preprocessing import LabelEncoder  
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.externals import joblib


forest_reg = RandomForestRegressor()
forest_model = forest_reg.fit(train_X, train_Y.ravel())
Ypred2 = forest_reg.predict(Xtest)

lin_reg = LinearRegression()
linreg_model = lin_reg.fit(train_X, train_Y.ravel())
Ypred3 = lin_reg.predict(Xtest)

joblib.dump(linreg_model, 'LinReg_model.csv', protocol=2) #save the Lin-Reg model into the file named "LinReg_model.pkl"
joblib.dump(forest_model, 'Forest_Model.csv', protocol=2) #save the RF model into the file named "Forest_model.pkl"


lin_mse = mean_squared_error(Ytest, Ypred2)
forest_rmse = np.sqrt(lin_mse)
print("Root Mean Square Error of RF Algo:\t",forest_rmse)

lin_mse2 = mean_squared_error(Ytest, Ypred3)
lin_rmse = np.sqrt(lin_mse2)
print("Root Mean Square Error of Linear Regression Algo:\t", lin_rmse)

#Evaluate RF algo on the whole training set by cross-validation
scores = cross_val_score(forest_reg, train_X, train_Y.ravel(), scoring = "neg_mean_squared_error", cv = 10)
forest_rmse_scores = np.sqrt(-scores)

#Evaluate RF algo on the whole test set by cross-validation
scores3 = cross_val_score(forest_reg, Xtest, Ytest.ravel(), scoring = "neg_mean_squared_error", cv = 10)
forest_rmse_scores3 = np.sqrt(-scores3)

#Evaluate Lin-Reg algo on the whole training set by cross-validation with k = 50 folds
scores2 = cross_val_score(lin_reg, train_X, train_Y.ravel(), scoring = "neg_mean_squared_error", cv = 10)
linreg_rmse_scores2 = np.sqrt(-scores2)

#Evaluate Lin-Reg algo on the test set by cross-validation
scores4 = cross_val_score(lin_reg, Xtest, Ytest.ravel(), scoring = "neg_mean_squared_error", cv = 10)
linreg_rmse_scores4 = np.sqrt(-scores4)

def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard", scores.std())
    print("Max:", scores.max())
    print("Min:", scores.min())

display_scores(linreg_rmse_scores2)
display_scores(linreg_rmse_scores4)
lin_mae_RF = mean_absolute_error(Ytest, Ypred2)
lin_mae_LR = mean_absolute_error(Ytest, Ypred3)

# display_scores(Accept_rate_prediction)
# print("Mean Square Error:\t", linreg_rmse_scores2)
# print("Mean Absolute Error:\t", lin_mae)

ValueError: Found input variables with inconsistent numbers of samples: [200, 1]

Comment: We generated Random Forest and Linear Regression model using the same train and test sets generated in the first slide. We then compute the RMSE of each model, as well as the RMSE using cross-validation with 50 folds. In both ways, RF performs, *at the very least*, as worse as Linear Regression model in terms of the RMSE.  

In [225]:
Xtest = np.array([[1.25, 0.7414, 58, 14, 0.3448, 0.1706, 0.1137, 0.458333*24, 1, 0, 0, 0, 0, 0, 0]],  dtype=np.float32)
lin_model = joblib.load('LinReg_model.csv')
forest_model = joblib.load('Forest_Model.csv')

float(lin_model.predict(Xtest)), float(forest_model.predict(Xtest))


(1.3765828491171839, 1.2)