In [20]:
# A linear regression learning algorithm example using TensorFlow library.

from __future__ import print_function

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.contrib import learn
from sklearn import metrics
import random

rng = np.random

#read csv file
datapath = "./"
Ha_Noi = pd.read_csv(datapath+"OnlineDrivers_HaNoi_10days.csv")
#Add an additional column into the table
# sLength = len(Ha_Noi['accept_rate'])
Ha_Noi['accept_rate_timeT'] = pd.Series(Ha_Noi['accept_rate'], index=Ha_Noi.index)
#Shift the entries in the accept_rate column upward
Ha_Noi.accept_rate = Ha_Noi.accept_rate.shift(-1)
Ha_Noi['Pricing_timeT'] =  pd.Series(Ha_Noi['Pricing'], index=Ha_Noi.index)
Ha_Noi.Pricing = Ha_Noi.Pricing.shift(-1)

#Drop all the "na" entries in the original table
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent4"])
Ha_Noi = Ha_Noi.dropna(subset=["accept_rate"])
Ha_Noi = Ha_Noi.dropna(subset = ["longwait_percent2"])
Ha_Noi = Ha_Noi.drop(Ha_Noi[Ha_Noi.Percentchange_onlinedrivers == 0].index)
Ha_Noi = Ha_Noi.dropna(subset = ["DriverBusyRate"])


#define normalized function for our dataset
# def normalize(array):
#     return (array - array.mean()) / array.std()

df2 = pd.DataFrame(Ha_Noi)

#split the dataset into training and testing sets
train_set, test_set = train_test_split(Ha_Noi, test_size=0.2, random_state = random.randint(20, 200))


# Training Data
train_X =  train_set[['accept_rate_timeT',  'longwait_percent1','Request/Supply', 'DriverBusyRate', 'Hour', 'wd1', 'wd2', 'wd3', 'wd4', 'wd5', 'wd6', 'wd7']]
train_Y =  train_set['accept_rate']

#Testing Data
Xtest = test_set[['accept_rate_timeT', 'longwait_percent1', 'Request/Supply','DriverBusyRate', 'Hour', 'wd1', 'wd2', 'wd3', 'wd4', 'wd5', 'wd6', 'wd7']]
Ytest = test_set['accept_rate']


# 'longwait_percent2', 'accept_rate_timeT', 'Percentchange_onlinedrivers','DriverBusyRate'





Comment: We choose all the features that have the highest positive correlations with Pricing based on the correlation testing obtained in the 3rd slide. This resulted in *7* features plus *2* obvious *lattitude features* to take into account the weekly and seasonality effects (i.e, Hour and Day of the week).

The reason we decided to add the acceptance rate in the previous 20-mins is because of its strong negative correlations with pricing, which prevents the model from overfitting and *gives* it the ability to now when to *decrease* the price (as it's clear that pricing has strong negative correlation with acceptance rate in the previous 20-mins, at least based on our historical data from July 3rd to August 2nd). Without it, our best model - Random Forest Regressor - would always *increase* the price at the moment when we *actually* decreased it (we obtain this observation by testing our Random Forest Regression model with historical data). Also, we chose the factor "longwait_percent1" mainly due to its stronger positive correlation with pricing compared to other "longwait_percentX" factors (X = 2,3,4). Longwait_Percent1 demonstrates the fact that the later a request is made in the 20-min time interval, the higher importance it is to the acceptance rate in the next 20-min period. The factor Request/Supply is added because our goal for this project is to choose a price that *BALANCES* supply (number of online drivers) vs demand (total request), which is the same as balance the magnitude of Request/Supply. 

In [13]:
# Parameters
num_epochs = 1000
STEPS = 150000
BATCH_SIZE = 80

#Deep Neural Network Regressor 
feature_column1 = learn.infer_real_valued_columns_from_input(train_X)
# feature_column2 = learn.infer_real_valued_columns_from_input(train_X2)
regressor = learn.DNNRegressor(feature_columns = feature_column1, hidden_units= [100,100], model_dir = "./AR2")
regressor.fit(train_X, train_Y, max_steps= STEPS, batch_size= BATCH_SIZE)
Ypred = regressor.predict_scores(Xtest, as_iterable=False)
Ypred = np.asarray(list(Ypred))
rmse = np.sqrt(((Ypred - Ytest) ** 2).mean(axis=0))
print("Root mean square Error: %.3f" %rmse)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f9438f3f410>, '_model_dir': './AR2', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_evaluation_master': '', '_master': ''}
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Sci

INFO:tensorflow:loss = 0.00394395, step = 5201 (0.446 sec)
INFO:tensorflow:global_step/sec: 255.533
INFO:tensorflow:loss = 0.00508053, step = 5301 (0.391 sec)
INFO:tensorflow:global_step/sec: 229.504
INFO:tensorflow:loss = 0.00284412, step = 5401 (0.436 sec)
INFO:tensorflow:global_step/sec: 246.118
INFO:tensorflow:loss = 0.0065317, step = 5501 (0.406 sec)
INFO:tensorflow:global_step/sec: 226.885
INFO:tensorflow:loss = 0.00259175, step = 5601 (0.441 sec)
INFO:tensorflow:global_step/sec: 234.141
INFO:tensorflow:loss = 0.004104, step = 5701 (0.427 sec)
INFO:tensorflow:global_step/sec: 223.8
INFO:tensorflow:loss = 0.00352381, step = 5801 (0.449 sec)
INFO:tensorflow:global_step/sec: 239.983
INFO:tensorflow:loss = 0.00525386, step = 5901 (0.416 sec)
INFO:tensorflow:global_step/sec: 227.778
INFO:tensorflow:loss = 0.00312313, step = 6001 (0.439 sec)
INFO:tensorflow:global_step/sec: 236.3
INFO:tensorflow:loss = 0.00405835, step = 6101 (0.422 sec)
INFO:tensorflow:global_step/sec: 230.445
INFO:te

INFO:tensorflow:loss = 0.00268182, step = 13401 (0.419 sec)
INFO:tensorflow:global_step/sec: 220.419
INFO:tensorflow:loss = 0.00379591, step = 13501 (0.453 sec)
INFO:tensorflow:global_step/sec: 247.473
INFO:tensorflow:loss = 0.00330774, step = 13601 (0.407 sec)
INFO:tensorflow:global_step/sec: 241.459
INFO:tensorflow:loss = 0.00654631, step = 13701 (0.411 sec)
INFO:tensorflow:global_step/sec: 244.87
INFO:tensorflow:loss = 0.00310623, step = 13801 (0.409 sec)
INFO:tensorflow:global_step/sec: 212.915
INFO:tensorflow:loss = 0.00237217, step = 13901 (0.472 sec)
INFO:tensorflow:global_step/sec: 238.164
INFO:tensorflow:loss = 0.00342755, step = 14001 (0.417 sec)
INFO:tensorflow:global_step/sec: 216.515
INFO:tensorflow:loss = 0.00431263, step = 14101 (0.462 sec)
INFO:tensorflow:global_step/sec: 210.834
INFO:tensorflow:loss = 0.00357067, step = 14201 (0.478 sec)
INFO:tensorflow:global_step/sec: 211.637
INFO:tensorflow:loss = 0.00464908, step = 14301 (0.469 sec)
INFO:tensorflow:global_step/sec:

INFO:tensorflow:global_step/sec: 233.643
INFO:tensorflow:loss = 0.00563326, step = 21601 (0.428 sec)
INFO:tensorflow:global_step/sec: 234.517
INFO:tensorflow:loss = 0.00487904, step = 21701 (0.427 sec)
INFO:tensorflow:global_step/sec: 252.974
INFO:tensorflow:loss = 0.00533449, step = 21801 (0.394 sec)
INFO:tensorflow:global_step/sec: 227.301
INFO:tensorflow:loss = 0.00439635, step = 21901 (0.441 sec)
INFO:tensorflow:global_step/sec: 240.253
INFO:tensorflow:loss = 0.00579602, step = 22001 (0.416 sec)
INFO:tensorflow:global_step/sec: 241.378
INFO:tensorflow:loss = 0.00334599, step = 22101 (0.414 sec)
INFO:tensorflow:global_step/sec: 239.033
INFO:tensorflow:loss = 0.00520977, step = 22201 (0.418 sec)
INFO:tensorflow:global_step/sec: 213.298
INFO:tensorflow:loss = 0.00431051, step = 22301 (0.473 sec)
INFO:tensorflow:global_step/sec: 219.896
INFO:tensorflow:loss = 0.0062073, step = 22401 (0.451 sec)
INFO:tensorflow:global_step/sec: 233.678
INFO:tensorflow:loss = 0.00282924, step = 22501 (0.

INFO:tensorflow:loss = 0.00556105, step = 29701 (0.396 sec)
INFO:tensorflow:global_step/sec: 231.1
INFO:tensorflow:loss = 0.00455292, step = 29801 (0.434 sec)
INFO:tensorflow:global_step/sec: 236.926
INFO:tensorflow:loss = 0.00252384, step = 29901 (0.422 sec)
INFO:tensorflow:global_step/sec: 216.867
INFO:tensorflow:loss = 0.00288047, step = 30001 (0.460 sec)
INFO:tensorflow:global_step/sec: 216.609
INFO:tensorflow:loss = 0.00548413, step = 30101 (0.461 sec)
INFO:tensorflow:global_step/sec: 240.572
INFO:tensorflow:loss = 0.00331072, step = 30201 (0.416 sec)
INFO:tensorflow:global_step/sec: 231.425
INFO:tensorflow:loss = 0.00309901, step = 30301 (0.432 sec)
INFO:tensorflow:global_step/sec: 223.556
INFO:tensorflow:loss = 0.00379266, step = 30401 (0.447 sec)
INFO:tensorflow:global_step/sec: 251.75
INFO:tensorflow:loss = 0.00518315, step = 30501 (0.398 sec)
INFO:tensorflow:global_step/sec: 243.876
INFO:tensorflow:loss = 0.00398752, step = 30601 (0.410 sec)
INFO:tensorflow:global_step/sec: 2

INFO:tensorflow:global_step/sec: 207.659
INFO:tensorflow:loss = 0.00392294, step = 37901 (0.482 sec)
INFO:tensorflow:global_step/sec: 227.367
INFO:tensorflow:loss = 0.00365933, step = 38001 (0.439 sec)
INFO:tensorflow:global_step/sec: 200.293
INFO:tensorflow:loss = 0.00477768, step = 38101 (0.499 sec)
INFO:tensorflow:global_step/sec: 241.886
INFO:tensorflow:loss = 0.0030453, step = 38201 (0.413 sec)
INFO:tensorflow:global_step/sec: 216.878
INFO:tensorflow:loss = 0.00347875, step = 38301 (0.461 sec)
INFO:tensorflow:global_step/sec: 239.543
INFO:tensorflow:loss = 0.00341369, step = 38401 (0.417 sec)
INFO:tensorflow:global_step/sec: 209.191
INFO:tensorflow:loss = 0.00450053, step = 38501 (0.478 sec)
INFO:tensorflow:global_step/sec: 229.094
INFO:tensorflow:loss = 0.00510409, step = 38601 (0.436 sec)
INFO:tensorflow:global_step/sec: 228.477
INFO:tensorflow:loss = 0.00216789, step = 38701 (0.438 sec)
INFO:tensorflow:global_step/sec: 218.661
INFO:tensorflow:loss = 0.00429031, step = 38801 (0.

INFO:tensorflow:loss = 0.00294873, step = 46001 (0.429 sec)
INFO:tensorflow:global_step/sec: 255.072
INFO:tensorflow:loss = 0.00306468, step = 46101 (0.393 sec)
INFO:tensorflow:global_step/sec: 250.769
INFO:tensorflow:loss = 0.00315911, step = 46201 (0.398 sec)
INFO:tensorflow:global_step/sec: 242.711
INFO:tensorflow:loss = 0.00355543, step = 46301 (0.412 sec)
INFO:tensorflow:global_step/sec: 238.503
INFO:tensorflow:loss = 0.00478985, step = 46401 (0.419 sec)
INFO:tensorflow:global_step/sec: 237.938
INFO:tensorflow:loss = 0.00430052, step = 46501 (0.420 sec)
INFO:tensorflow:global_step/sec: 246.141
INFO:tensorflow:loss = 0.00267598, step = 46601 (0.406 sec)
INFO:tensorflow:global_step/sec: 254.573
INFO:tensorflow:loss = 0.00395097, step = 46701 (0.393 sec)
INFO:tensorflow:global_step/sec: 234.365
INFO:tensorflow:loss = 0.00301958, step = 46801 (0.427 sec)
INFO:tensorflow:global_step/sec: 240.712
INFO:tensorflow:loss = 0.00244904, step = 46901 (0.415 sec)
INFO:tensorflow:global_step/sec

INFO:tensorflow:global_step/sec: 210.656
INFO:tensorflow:loss = 0.00373219, step = 54201 (0.475 sec)
INFO:tensorflow:global_step/sec: 225.928
INFO:tensorflow:loss = 0.00314393, step = 54301 (0.443 sec)
INFO:tensorflow:global_step/sec: 244.594
INFO:tensorflow:loss = 0.00443, step = 54401 (0.409 sec)
INFO:tensorflow:global_step/sec: 238.18
INFO:tensorflow:loss = 0.00261185, step = 54501 (0.420 sec)
INFO:tensorflow:global_step/sec: 242.744
INFO:tensorflow:loss = 0.00371045, step = 54601 (0.412 sec)
INFO:tensorflow:global_step/sec: 188.005
INFO:tensorflow:loss = 0.00450858, step = 54701 (0.532 sec)
INFO:tensorflow:global_step/sec: 211.785
INFO:tensorflow:loss = 0.00520807, step = 54801 (0.473 sec)
INFO:tensorflow:global_step/sec: 246.8
INFO:tensorflow:loss = 0.00435506, step = 54901 (0.405 sec)
INFO:tensorflow:global_step/sec: 241.215
INFO:tensorflow:loss = 0.0050074, step = 55001 (0.414 sec)
INFO:tensorflow:global_step/sec: 189.46
INFO:tensorflow:loss = 0.00554658, step = 55101 (0.530 sec

INFO:tensorflow:loss = 0.00266385, step = 62301 (0.535 sec)
INFO:tensorflow:global_step/sec: 229.203
INFO:tensorflow:loss = 0.00492277, step = 62401 (0.436 sec)
INFO:tensorflow:global_step/sec: 225.159
INFO:tensorflow:loss = 0.00331448, step = 62501 (0.444 sec)
INFO:tensorflow:global_step/sec: 198.912
INFO:tensorflow:loss = 0.00505827, step = 62601 (0.502 sec)
INFO:tensorflow:global_step/sec: 217.771
INFO:tensorflow:loss = 0.0051737, step = 62701 (0.461 sec)
INFO:tensorflow:global_step/sec: 229.341
INFO:tensorflow:loss = 0.00308222, step = 62801 (0.434 sec)
INFO:tensorflow:global_step/sec: 232.554
INFO:tensorflow:loss = 0.00421826, step = 62901 (0.433 sec)
INFO:tensorflow:global_step/sec: 213.774
INFO:tensorflow:loss = 0.00583594, step = 63001 (0.465 sec)
INFO:tensorflow:global_step/sec: 178.137
INFO:tensorflow:loss = 0.00298746, step = 63101 (0.560 sec)
INFO:tensorflow:global_step/sec: 215.35
INFO:tensorflow:loss = 0.00411928, step = 63201 (0.464 sec)
INFO:tensorflow:global_step/sec: 

INFO:tensorflow:global_step/sec: 196.957
INFO:tensorflow:loss = 0.00436188, step = 70501 (0.508 sec)
INFO:tensorflow:global_step/sec: 233.112
INFO:tensorflow:loss = 0.00405947, step = 70601 (0.429 sec)
INFO:tensorflow:global_step/sec: 180.267
INFO:tensorflow:loss = 0.00207764, step = 70701 (0.555 sec)
INFO:tensorflow:global_step/sec: 217.636
INFO:tensorflow:loss = 0.00298443, step = 70801 (0.459 sec)
INFO:tensorflow:global_step/sec: 194.505
INFO:tensorflow:loss = 0.00624645, step = 70901 (0.514 sec)
INFO:tensorflow:global_step/sec: 180.836
INFO:tensorflow:loss = 0.00292307, step = 71001 (0.554 sec)
INFO:tensorflow:global_step/sec: 205.772
INFO:tensorflow:loss = 0.00371297, step = 71101 (0.485 sec)
INFO:tensorflow:global_step/sec: 232.366
INFO:tensorflow:loss = 0.00338444, step = 71201 (0.430 sec)
INFO:tensorflow:global_step/sec: 252.924
INFO:tensorflow:loss = 0.00285339, step = 71301 (0.395 sec)
INFO:tensorflow:global_step/sec: 237.174
INFO:tensorflow:loss = 0.0032803, step = 71401 (0.

INFO:tensorflow:loss = 0.00383183, step = 78601 (0.425 sec)
INFO:tensorflow:global_step/sec: 258.033
INFO:tensorflow:loss = 0.00271843, step = 78701 (0.388 sec)
INFO:tensorflow:global_step/sec: 231.423
INFO:tensorflow:loss = 0.00278603, step = 78801 (0.433 sec)
INFO:tensorflow:global_step/sec: 249.031
INFO:tensorflow:loss = 0.0029923, step = 78901 (0.403 sec)
INFO:tensorflow:global_step/sec: 238.459
INFO:tensorflow:loss = 0.00292525, step = 79001 (0.417 sec)
INFO:tensorflow:global_step/sec: 226.603
INFO:tensorflow:loss = 0.00316174, step = 79101 (0.441 sec)
INFO:tensorflow:global_step/sec: 227.787
INFO:tensorflow:loss = 0.00468692, step = 79201 (0.439 sec)
INFO:tensorflow:global_step/sec: 191.439
INFO:tensorflow:loss = 0.00331148, step = 79301 (0.522 sec)
INFO:tensorflow:global_step/sec: 212.79
INFO:tensorflow:loss = 0.00334753, step = 79401 (0.470 sec)
INFO:tensorflow:global_step/sec: 214.297
INFO:tensorflow:loss = 0.00304962, step = 79501 (0.467 sec)
INFO:tensorflow:global_step/sec: 

INFO:tensorflow:global_step/sec: 188.37
INFO:tensorflow:loss = 0.00600785, step = 86801 (0.531 sec)
INFO:tensorflow:global_step/sec: 227.794
INFO:tensorflow:loss = 0.00431034, step = 86901 (0.438 sec)
INFO:tensorflow:global_step/sec: 240.069
INFO:tensorflow:loss = 0.00432646, step = 87001 (0.417 sec)
INFO:tensorflow:global_step/sec: 230.617
INFO:tensorflow:loss = 0.00365968, step = 87101 (0.434 sec)
INFO:tensorflow:global_step/sec: 211.644
INFO:tensorflow:loss = 0.00817124, step = 87201 (0.472 sec)
INFO:tensorflow:global_step/sec: 242.613
INFO:tensorflow:loss = 0.00298851, step = 87301 (0.414 sec)
INFO:tensorflow:global_step/sec: 209.343
INFO:tensorflow:loss = 0.00442852, step = 87401 (0.479 sec)
INFO:tensorflow:global_step/sec: 238.052
INFO:tensorflow:loss = 0.00311929, step = 87501 (0.417 sec)
INFO:tensorflow:global_step/sec: 211.556
INFO:tensorflow:loss = 0.003257, step = 87601 (0.475 sec)
INFO:tensorflow:global_step/sec: 235.081
INFO:tensorflow:loss = 0.00279082, step = 87701 (0.42

INFO:tensorflow:loss = 0.00299932, step = 94901 (0.414 sec)
INFO:tensorflow:global_step/sec: 196.198
INFO:tensorflow:loss = 0.00632951, step = 95001 (0.509 sec)
INFO:tensorflow:global_step/sec: 233.854
INFO:tensorflow:loss = 0.00466662, step = 95101 (0.428 sec)
INFO:tensorflow:global_step/sec: 215.73
INFO:tensorflow:loss = 0.00516178, step = 95201 (0.463 sec)
INFO:tensorflow:global_step/sec: 244.3
INFO:tensorflow:loss = 0.00308119, step = 95301 (0.409 sec)
INFO:tensorflow:global_step/sec: 231.631
INFO:tensorflow:loss = 0.00410687, step = 95401 (0.432 sec)
INFO:tensorflow:global_step/sec: 228.9
INFO:tensorflow:loss = 0.00392208, step = 95501 (0.437 sec)
INFO:tensorflow:global_step/sec: 240.339
INFO:tensorflow:loss = 0.00509385, step = 95601 (0.416 sec)
INFO:tensorflow:global_step/sec: 219.695
INFO:tensorflow:loss = 0.00351108, step = 95701 (0.455 sec)
INFO:tensorflow:global_step/sec: 226.368
INFO:tensorflow:loss = 0.00502912, step = 95801 (0.442 sec)
INFO:tensorflow:global_step/sec: 204

INFO:tensorflow:global_step/sec: 231.619
INFO:tensorflow:loss = 0.00421991, step = 103101 (0.433 sec)
INFO:tensorflow:global_step/sec: 239.209
INFO:tensorflow:loss = 0.00580995, step = 103201 (0.417 sec)
INFO:tensorflow:global_step/sec: 228.833
INFO:tensorflow:loss = 0.00463066, step = 103301 (0.437 sec)
INFO:tensorflow:global_step/sec: 235.894
INFO:tensorflow:loss = 0.00383383, step = 103401 (0.424 sec)
INFO:tensorflow:global_step/sec: 250.794
INFO:tensorflow:loss = 0.00297618, step = 103501 (0.398 sec)
INFO:tensorflow:global_step/sec: 235.753
INFO:tensorflow:loss = 0.00304307, step = 103601 (0.424 sec)
INFO:tensorflow:global_step/sec: 230.768
INFO:tensorflow:loss = 0.00376673, step = 103701 (0.434 sec)
INFO:tensorflow:global_step/sec: 222.027
INFO:tensorflow:loss = 0.00442297, step = 103801 (0.450 sec)
INFO:tensorflow:global_step/sec: 234.314
INFO:tensorflow:loss = 0.00374342, step = 103901 (0.427 sec)
INFO:tensorflow:global_step/sec: 227.367
INFO:tensorflow:loss = 0.00257976, step =

INFO:tensorflow:global_step/sec: 247.589
INFO:tensorflow:loss = 0.00318035, step = 111201 (0.404 sec)
INFO:tensorflow:global_step/sec: 235.346
INFO:tensorflow:loss = 0.00392411, step = 111301 (0.427 sec)
INFO:tensorflow:global_step/sec: 207.063
INFO:tensorflow:loss = 0.00436945, step = 111401 (0.482 sec)
INFO:tensorflow:global_step/sec: 205.361
INFO:tensorflow:loss = 0.00299077, step = 111501 (0.487 sec)
INFO:tensorflow:global_step/sec: 223.697
INFO:tensorflow:loss = 0.00469037, step = 111601 (0.447 sec)
INFO:tensorflow:global_step/sec: 234.709
INFO:tensorflow:loss = 0.00422414, step = 111701 (0.426 sec)
INFO:tensorflow:global_step/sec: 235.249
INFO:tensorflow:loss = 0.00385986, step = 111801 (0.425 sec)
INFO:tensorflow:global_step/sec: 233.001
INFO:tensorflow:loss = 0.00489121, step = 111901 (0.433 sec)
INFO:tensorflow:global_step/sec: 229.668
INFO:tensorflow:loss = 0.00453465, step = 112001 (0.432 sec)
INFO:tensorflow:global_step/sec: 210.013
INFO:tensorflow:loss = 0.00387419, step =

INFO:tensorflow:global_step/sec: 246.622
INFO:tensorflow:loss = 0.00403008, step = 119301 (0.405 sec)
INFO:tensorflow:global_step/sec: 202.969
INFO:tensorflow:loss = 0.00448178, step = 119401 (0.493 sec)
INFO:tensorflow:global_step/sec: 238.723
INFO:tensorflow:loss = 0.00321866, step = 119501 (0.419 sec)
INFO:tensorflow:global_step/sec: 237.563
INFO:tensorflow:loss = 0.00502589, step = 119601 (0.421 sec)
INFO:tensorflow:global_step/sec: 234.594
INFO:tensorflow:loss = 0.00378522, step = 119701 (0.429 sec)
INFO:tensorflow:global_step/sec: 213.43
INFO:tensorflow:loss = 0.0054761, step = 119801 (0.467 sec)
INFO:tensorflow:global_step/sec: 230.383
INFO:tensorflow:loss = 0.00226165, step = 119901 (0.433 sec)
INFO:tensorflow:global_step/sec: 260.27
INFO:tensorflow:loss = 0.00460886, step = 120001 (0.384 sec)
INFO:tensorflow:global_step/sec: 245.935
INFO:tensorflow:loss = 0.00451275, step = 120101 (0.407 sec)
INFO:tensorflow:global_step/sec: 241.55
INFO:tensorflow:loss = 0.00424896, step = 120

INFO:tensorflow:global_step/sec: 233.401
INFO:tensorflow:loss = 0.00304301, step = 127401 (0.430 sec)
INFO:tensorflow:global_step/sec: 229.06
INFO:tensorflow:loss = 0.00259283, step = 127501 (0.435 sec)
INFO:tensorflow:global_step/sec: 208.222
INFO:tensorflow:loss = 0.00434242, step = 127601 (0.480 sec)
INFO:tensorflow:global_step/sec: 231.416
INFO:tensorflow:loss = 0.00294418, step = 127701 (0.432 sec)
INFO:tensorflow:global_step/sec: 228.971
INFO:tensorflow:loss = 0.00277073, step = 127801 (0.437 sec)
INFO:tensorflow:global_step/sec: 244.46
INFO:tensorflow:loss = 0.00292035, step = 127901 (0.409 sec)
INFO:tensorflow:global_step/sec: 227.042
INFO:tensorflow:loss = 0.00287024, step = 128001 (0.440 sec)
INFO:tensorflow:global_step/sec: 217.505
INFO:tensorflow:loss = 0.00471589, step = 128101 (0.461 sec)
INFO:tensorflow:global_step/sec: 233.581
INFO:tensorflow:loss = 0.00318414, step = 128201 (0.428 sec)
INFO:tensorflow:global_step/sec: 242.496
INFO:tensorflow:loss = 0.0034605, step = 12

INFO:tensorflow:global_step/sec: 212.313
INFO:tensorflow:loss = 0.00428983, step = 135501 (0.471 sec)
INFO:tensorflow:Saving checkpoints for 135529 into ./AR2/model.ckpt.
INFO:tensorflow:global_step/sec: 204.931
INFO:tensorflow:loss = 0.00467997, step = 135601 (0.488 sec)
INFO:tensorflow:global_step/sec: 228.607
INFO:tensorflow:loss = 0.00572118, step = 135701 (0.437 sec)
INFO:tensorflow:global_step/sec: 218.195
INFO:tensorflow:loss = 0.00403946, step = 135801 (0.459 sec)
INFO:tensorflow:global_step/sec: 218.254
INFO:tensorflow:loss = 0.00391542, step = 135901 (0.459 sec)
INFO:tensorflow:global_step/sec: 214.659
INFO:tensorflow:loss = 0.0026843, step = 136001 (0.465 sec)
INFO:tensorflow:global_step/sec: 203.449
INFO:tensorflow:loss = 0.00377175, step = 136101 (0.495 sec)
INFO:tensorflow:global_step/sec: 208.524
INFO:tensorflow:loss = 0.00445769, step = 136201 (0.478 sec)
INFO:tensorflow:global_step/sec: 226.465
INFO:tensorflow:loss = 0.00416569, step = 136301 (0.439 sec)
INFO:tensorflo

INFO:tensorflow:global_step/sec: 243.443
INFO:tensorflow:loss = 0.00440479, step = 143501 (0.411 sec)
INFO:tensorflow:global_step/sec: 187.362
INFO:tensorflow:loss = 0.00371234, step = 143601 (0.532 sec)
INFO:tensorflow:global_step/sec: 244.336
INFO:tensorflow:loss = 0.0042289, step = 143701 (0.411 sec)
INFO:tensorflow:global_step/sec: 234.406
INFO:tensorflow:loss = 0.0047257, step = 143801 (0.424 sec)
INFO:tensorflow:global_step/sec: 234.846
INFO:tensorflow:loss = 0.00416549, step = 143901 (0.426 sec)
INFO:tensorflow:global_step/sec: 212.362
INFO:tensorflow:loss = 0.00392387, step = 144001 (0.471 sec)
INFO:tensorflow:global_step/sec: 241.729
INFO:tensorflow:loss = 0.00332269, step = 144101 (0.413 sec)
INFO:tensorflow:global_step/sec: 221.083
INFO:tensorflow:loss = 0.00323489, step = 144201 (0.452 sec)
INFO:tensorflow:global_step/sec: 237.611
INFO:tensorflow:loss = 0.00372723, step = 144301 (0.423 sec)
INFO:tensorflow:global_step/sec: 177.065
INFO:tensorflow:loss = 0.00434072, step = 1

INFO:tensorflow:Restoring parameters from ./AR2/model.ckpt-150000
Root mean square Error: 0.069


Comment: We use DNN Regression Model with 2 layers: 1 input with 100 nodes, and 1 output with 100 nodes, and train our neural networks using the train set X obtained from a random cut to our original dataset,  and then measure its accuracy with RMSE using the test set X. We save our model into the same folder as our Python code with the file name "SP". In this case, the RMSE of DNN Regression is much higher than that of RF or Linear Regression Model

In [15]:
x = np.array([0.56,0.2, 18,0, 0, 0, 0, 0, 0, 1])
feature_column1 = learn.infer_real_valued_columns_from_input(x)

y = np.array([[0.6, 35, 0.4, 0.1136, 0.0965, 0.472222*24, 1, 0, 0, 0, 0, 0, 0]])
new_regressor = learn.DNNRegressor(feature_columns = feature_column1, hidden_units= [100,100], model_dir = './AR2')
new_regressor.predict_scores(y, as_iterable = False)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f9457b87810>, '_model_dir': './AR2', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_evaluation_master': '', '_master': ''}
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batc

INFO:tensorflow:Restoring parameters from ./AR2/model.ckpt-150000


array([ 0.37442076], dtype=float32)

Comment: We load our saved neural network model, and then use it to make prediction with new input. Note that the dimension of the *VECTOR* input always *EQUALS* to (number of columns of train_X x 1). The prediction here is sometimes more accurate than that of RF (although they are pretty close), but most of the time it is worse than RF. Furthermore, RF is way better than Linear Regression model for this particular problem when testing with real-world inputs (based on trends and how accurate the predicted value is compared to actual).

# Correlation testing between acceptance rate, online drivers and percent change in online drivers

In [133]:
corr_matrix = Ha_Noi.corr()
corr_matrix["Pricing"].sort_values(ascending=False)

# %matplotlib inline
# import matplotlib.pyplot as plt
# df2.hist(bins = 50, figsize = (15, 15))



Pricing                                          1.000000
Pricing_timeT                                    0.897853
longwait_percent1                                0.492831
longwait_percent2                                0.492808
longwait_percent4                                0.492402
longwait_percent3                                0.490960
long_waiting                                     0.342758
wd2                                              0.159849
Request/Supply                                   0.126120
request                                          0.055332
wd1                                              0.054917
DriverBusyRate                                   0.017719
wd6                                              0.014756
wd4                                             -0.003362
Percentchange_onlinedrivers                     -0.013461
wd5                                             -0.027740
wd3                                             -0.040631
online_drivers

# Random Forest Algorithm and Model Evaluations using Cross-Validation

In [21]:
import numpy as np
from sklearn.preprocessing import LabelEncoder  
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.externals import joblib


forest_reg = RandomForestRegressor()
forest_model = forest_reg.fit(train_X, train_Y.ravel())
Ypred2 = forest_reg.predict(Xtest)

lin_reg = LinearRegression()
linreg_model = lin_reg.fit(train_X, train_Y.ravel())
Ypred3 = lin_reg.predict(Xtest)

joblib.dump(linreg_model, 'LinReg_model_AR.csv', protocol=2) #save the Lin-Reg model into the file named "LinReg_model.pkl"
joblib.dump(forest_model, 'Forest_Model_AR.csv', protocol=2) #save the RF model into the file named "Forest_model.pkl"


lin_mse = mean_squared_error(Ytest, Ypred2)
forest_rmse = np.sqrt(lin_mse)
print("Root Mean Square Error of RF Algo:\t",forest_rmse)

lin_mse2 = mean_squared_error(Ytest, Ypred3)
lin_rmse = np.sqrt(lin_mse2)
print("Root Mean Square Error of Linear Regression Algo:\t", lin_rmse)

#Evaluate RF algo on the whole training set by cross-validation
scores = cross_val_score(forest_reg, train_X, train_Y.ravel(), scoring = "neg_mean_squared_error", cv = 10)
forest_rmse_scores = np.sqrt(-scores)

#Evaluate RF algo on the whole test set by cross-validation
scores3 = cross_val_score(forest_reg, Xtest, Ytest.ravel(), scoring = "neg_mean_squared_error", cv = 10)
forest_rmse_scores3 = np.sqrt(-scores3)

#Evaluate Lin-Reg algo on the whole training set by cross-validation with k = 50 folds
scores2 = cross_val_score(lin_reg, train_X, train_Y.ravel(), scoring = "neg_mean_squared_error", cv = 10)
linreg_rmse_scores2 = np.sqrt(-scores2)

#Evaluate Lin-Reg algo on the test set by cross-validation
scores4 = cross_val_score(lin_reg, Xtest, Ytest.ravel(), scoring = "neg_mean_squared_error", cv = 10)
linreg_rmse_scores4 = np.sqrt(-scores4)

def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard", scores.std())
    print("Max:", scores.max())
    print("Min:", scores.min())

display_scores(linreg_rmse_scores2)
display_scores(linreg_rmse_scores4)
lin_mae_RF = mean_absolute_error(Ytest, Ypred2)
lin_mae_LR = mean_absolute_error(Ytest, Ypred3)

# display_scores(Accept_rate_prediction)
# print("Mean Square Error:\t", linreg_rmse_scores2)
# print("Mean Absolute Error:\t", lin_mae)

Root Mean Square Error of RF Algo:	 0.0663073677532
Root Mean Square Error of Linear Regression Algo:	 0.0621289350836
Scores: [ 0.0535399   0.06848684  0.05702087  0.07269487  0.05743028  0.08190541
  0.06391762  0.063896    0.05213727  0.07086783]
Mean: 0.0641896888942
Standard 0.00897311779661
Max: 0.0819054061635
Min: 0.0521372705592
Scores: [ 0.04113159  0.04531996  0.08751574  0.0837645   0.09490616  0.04953217
  0.0425326   0.05480147  0.04993293  0.05949995]
Mean: 0.0608937070367
Standard 0.0190975040537
Max: 0.0949061595539
Min: 0.0411315942384


Comment: We generated Random Forest and Linear Regression model using the same train and test sets generated in the first slide. We then compute the RMSE of each model, as well as the RMSE using cross-validation with 50 folds. In both ways, RF performs, *at the very least*, as worse as Linear Regression model in terms of the RMSE.  

In [35]:
Xtest = np.array([[0.8515, 0.1485, 0.15981, 0.1328, 0.3333*24, 0, 0, 0, 1, 0, 0, 0]],  dtype=np.float32)
lin_model = joblib.load('LinReg_model_AR.csv')
forest_model = joblib.load('Forest_Model_AR.csv')

float(lin_model.predict(Xtest)), float(forest_model.predict(Xtest))


(0.85546875, 0.8430448160000001)