In [58]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import MiniBatchKMeans
from sklearn.model_selection import train_test_split
from sklearn import metrics
from datetime import date
from mpl_toolkits import mplot3d
import datetime
%matplotlib inline

In [59]:
train = pd.read_csv('./train.csv')
train.head()

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,trip_duration
0,id2875421,2,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.96463,40.765602,N,455
1,id2377394,1,2016-06-12 00:43:35,2016-06-12 00:54:38,1,-73.980415,40.738564,-73.999481,40.731152,N,663
2,id3858529,2,2016-01-19 11:35:24,2016-01-19 12:10:48,1,-73.979027,40.763939,-74.005333,40.710087,N,2124
3,id3504673,2,2016-04-06 19:32:31,2016-04-06 19:39:40,1,-74.01004,40.719971,-74.012268,40.706718,N,429
4,id2181028,2,2016-03-26 13:30:55,2016-03-26 13:38:10,1,-73.973053,40.793209,-73.972923,40.78252,N,435


### Extract features from datetime(8)

In [60]:
train['pickup_datetime'] = pd.to_datetime(train.pickup_datetime)
train['dropoff_datetime'] = pd.to_datetime(train.dropoff_datetime)
train.loc[:, 'pickup_date'] = train['pickup_datetime'].dt.date
train.loc[:, 'pickup_weekday'] = train['pickup_datetime'].dt.weekday
train.loc[:, 'pickup_hour_weekofyear'] = train['pickup_datetime'].dt.weekofyear
train.loc[:, 'pickup_hour'] = train['pickup_datetime'].dt.hour
train.loc[:, 'pickup_minute'] = train['pickup_datetime'].dt.minute
train.loc[:, 'pickup_dt'] = (train['pickup_datetime'] - train['pickup_datetime'].min()).dt.total_seconds()
train.loc[:, 'pickup_week_hour'] = train['pickup_weekday'] * 24 + train['pickup_hour']
train.loc[:, 'pickup_dt_bin'] = (train['pickup_dt'] // (3 * 3600))

### Extract features from longitude and latitude(16)

In [61]:
def haversine_array(lat1, lng1, lat2, lng2):
    lat1, lng1, lat2, lng2 = map(np.radians, (lat1, lng1, lat2, lng2))
    AVG_EARTH_RADIUS = 6371  # in km
    lat = lat2 - lat1
    lng = lng2 - lng1
    d = np.sin(lat * 0.5) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(lng * 0.5) ** 2
    h = 2 * AVG_EARTH_RADIUS * np.arcsin(np.sqrt(d))
    return h

def dummy_manhattan_distance(lat1, lng1, lat2, lng2):
    a = haversine_array(lat1, lng1, lat1, lng2)
    b = haversine_array(lat1, lng1, lat2, lng1)
    return a + b

def bearing_array(lat1, lng1, lat2, lng2):
    AVG_EARTH_RADIUS = 6371  # in km
    lng_delta_rad = np.radians(lng2 - lng1)
    lat1, lng1, lat2, lng2 = map(np.radians, (lat1, lng1, lat2, lng2))
    y = np.sin(lng_delta_rad) * np.cos(lat2)
    x = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(lng_delta_rad)
    return np.degrees(np.arctan2(y, x))
coords = np.vstack((train[['pickup_latitude', 'pickup_longitude']].values,
                    train[['dropoff_latitude', 'dropoff_longitude']].values))

def rotate_distance(lat1,lng1,lat2,lng2):
    phi = (36.1/180)*np.pi
    lat1_rot = lng1*np.sin(phi)+lat1*np.cos(phi)
    lng1_rot = lng1*np.cos(phi)-lat1*np.sin(phi)
    lat2_rot = lng2*np.sin(phi)+lat2*np.cos(phi)
    lng2_rot = lng2*np.cos(phi)-lat2*np.sin(phi)
    rotate_distance = np.abs(lat1_rot-lat2_rot)+np.abs(lng1_rot-lng2_rot)
    return rotate_distance

In [62]:
pca = PCA().fit(coords)
train['pickup_pca0'] = pca.transform(train[['pickup_latitude', 'pickup_longitude']])[:, 0]
train['pickup_pca1'] = pca.transform(train[['pickup_latitude', 'pickup_longitude']])[:, 1]
train['dropoff_pca0'] = pca.transform(train[['dropoff_latitude', 'dropoff_longitude']])[:, 0]
train['dropoff_pca1'] = pca.transform(train[['dropoff_latitude', 'dropoff_longitude']])[:, 1]
train.loc[:, 'distance_haversine'] = haversine_array(train['pickup_latitude'].values, train['pickup_longitude'].values, train['dropoff_latitude'].values, train['dropoff_longitude'].values)
train.loc[:, 'distance_dummy_manhattan'] = dummy_manhattan_distance(train['pickup_latitude'].values, train['pickup_longitude'].values, train['dropoff_latitude'].values, train['dropoff_longitude'].values)
train.loc[:, 'direction'] = bearing_array(train['pickup_latitude'].values, train['pickup_longitude'].values, train['dropoff_latitude'].values, train['dropoff_longitude'].values)
train.loc[:, 'pca_manhattan'] = np.abs(train['dropoff_pca1'] - train['pickup_pca1']) + np.abs(train['dropoff_pca0'] - train['pickup_pca0'])
train.loc[:, 'rotate_distance'] = rotate_distance(train['pickup_latitude'].values,train['pickup_longitude'].values,train['dropoff_latitude'].values,train['dropoff_longitude'].values)
train.loc[:, 'pickup_lat_bin'] = np.round(train['pickup_latitude'], 2)
train.loc[:, 'pickup_long_bin'] = np.round(train['pickup_longitude'], 2)
train.loc[:, 'center_latitude'] = (train['pickup_latitude'].values + train['dropoff_latitude'].values) / 2
train.loc[:, 'center_longitude'] = (train['pickup_longitude'].values + train['dropoff_longitude'].values) / 2
train.loc[:, 'center_lat_bin'] = np.round(train['center_latitude'], 2)
train.loc[:, 'center_long_bin'] = np.round(train['center_longitude'], 2)
sample_ind = np.random.permutation(len(coords))[:500000]
kmeans = MiniBatchKMeans(n_clusters=100, batch_size=10000).fit(coords[sample_ind])
train.loc[:, 'pickup_cluster'] = kmeans.predict(train[['pickup_latitude', 'pickup_longitude']])
train.loc[:, 'dropoff_cluster'] = kmeans.predict(train[['dropoff_latitude', 'dropoff_longitude']])

### Speed features

In [63]:
train.loc[:, 'avg_speed_h'] = 1000 * train['distance_haversine'] / train['trip_duration']
train.loc[:, 'avg_speed_m'] = 1000 * train['distance_dummy_manhattan'] / train['trip_duration']

### Log_trip_duration

In [64]:
train['log_trip_duration'] = np.log(train['trip_duration'].values + 1)

### Aggregate features

In [65]:
for gby_col in ['pickup_hour', 'pickup_date', 'pickup_dt_bin',
               'pickup_week_hour', 'pickup_cluster', 'dropoff_cluster']:
    gby = train.groupby(gby_col).mean()[['avg_speed_h', 'avg_speed_m', 'log_trip_duration']]
    gby.columns = ['%s_gby_%s' % (col, gby_col) for col in gby.columns]
    train = pd.merge(train, gby, how='left', left_on=gby_col, right_index=True)

for gby_cols in [['center_lat_bin', 'center_long_bin'],
                 ['pickup_hour', 'center_lat_bin', 'center_long_bin'],
                 ['pickup_hour', 'pickup_cluster'],  ['pickup_hour', 'dropoff_cluster'],
                 ['pickup_cluster', 'dropoff_cluster']]:
    coord_speed = train.groupby(gby_cols).mean()[['avg_speed_h']].reset_index()
    coord_count = train.groupby(gby_cols).count()[['id']].reset_index()
    coord_stats = pd.merge(coord_speed, coord_count, on=gby_cols)
    coord_stats = coord_stats[coord_stats['id'] > 100]
    coord_stats.columns = gby_cols + ['avg_speed_h_%s' % '_'.join(gby_cols), 'cnt_%s' %  '_'.join(gby_cols)]
    train = pd.merge(train, coord_stats, how='left', on=gby_cols)

In [66]:
group_freq = '60min'
df_all = train[['id', 'pickup_datetime', 'pickup_cluster', 'dropoff_cluster']]
train.loc[:, 'pickup_datetime_group'] = train['pickup_datetime'].dt.round(group_freq)

# Count trips over 60min
df_counts = df_all.set_index('pickup_datetime')[['id']].sort_index()
df_counts['count_60min'] = df_counts.isnull().rolling(group_freq).count()['id']
train = train.merge(df_counts, on='id', how='left')

# Count how many trips are going to each cluster over time
dropoff_counts = df_all \
    .set_index('pickup_datetime') \
    .groupby([pd.TimeGrouper(group_freq), 'dropoff_cluster']) \
    .agg({'id': 'count'}) \
    .reset_index().set_index('pickup_datetime') \
    .groupby('dropoff_cluster').rolling('240min').mean() \
    .drop('dropoff_cluster', axis=1) \
    .reset_index().set_index('pickup_datetime').shift(freq='-120min').reset_index() \
    .rename(columns={'pickup_datetime': 'pickup_datetime_group', 'id': 'dropoff_cluster_count'})

train['dropoff_cluster_count'] = train[['pickup_datetime_group', 'dropoff_cluster']].merge(dropoff_counts, on=['pickup_datetime_group', 'dropoff_cluster'], how='left')['dropoff_cluster_count'].fillna(0)

  # This is added back by InteractiveShellApp.init_path()


In [67]:
# Count how many trips are going from each cluster over time
df_all = train[['id', 'pickup_datetime', 'pickup_cluster', 'dropoff_cluster']]
pickup_counts = df_all \
    .set_index('pickup_datetime') \
    .groupby([pd.TimeGrouper(group_freq), 'pickup_cluster']) \
    .agg({'id': 'count'}) \
    .reset_index().set_index('pickup_datetime') \
    .groupby('pickup_cluster').rolling('240min').mean() \
    .drop('pickup_cluster', axis=1) \
    .reset_index().set_index('pickup_datetime').shift(freq='-120min').reset_index() \
    .rename(columns={'pickup_datetime': 'pickup_datetime_group', 'id': 'pickup_cluster_count'})

train['pickup_cluster_count'] = train[['pickup_datetime_group', 'pickup_cluster']].merge(pickup_counts, on=['pickup_datetime_group', 'pickup_cluster'], how='left')['pickup_cluster_count'].fillna(0)

  This is separate from the ipykernel package so we can avoid doing imports until


In [68]:
train.head()

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,...,avg_speed_h_pickup_hour_pickup_cluster,cnt_pickup_hour_pickup_cluster,avg_speed_h_pickup_hour_dropoff_cluster,cnt_pickup_hour_dropoff_cluster,avg_speed_h_pickup_cluster_dropoff_cluster,cnt_pickup_cluster_dropoff_cluster,pickup_datetime_group,count_60min,dropoff_cluster_count,pickup_cluster_count
0,id2875421,2,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.96463,40.765602,N,...,3.037619,1327.0,3.245529,898.0,2.911407,258.0,2016-03-14 17:00:00,391.0,8.75,12.75
1,id2377394,1,2016-06-12 00:43:35,2016-06-12 00:54:38,1,-73.980415,40.738564,-73.999481,40.731152,N,...,4.53719,619.0,4.071894,721.0,2.87357,380.0,2016-06-12 01:00:00,461.0,7.25,6.333333
2,id3858529,2,2016-01-19 11:35:24,2016-01-19 12:10:48,1,-73.979027,40.763939,-74.005333,40.710087,N,...,3.071025,1837.0,3.899182,845.0,4.360023,234.0,2016-01-19 12:00:00,380.0,5.75,15.0
3,id3504673,2,2016-04-06 19:32:31,2016-04-06 19:39:40,1,-74.01004,40.719971,-74.012268,40.706718,N,...,3.810478,1669.0,4.584465,606.0,3.414243,773.0,2016-04-06 20:00:00,563.0,3.25,10.5
4,id2181028,2,2016-03-26 13:30:55,2016-03-26 13:38:10,1,-73.973053,40.793209,-73.972923,40.78252,N,...,4.149237,1583.0,3.722658,999.0,4.649837,1547.0,2016-03-26 14:00:00,432.0,6.0,8.5


### Weather features

In [69]:
weather = pd.read_csv('./weather.csv')
weather.head()

Unnamed: 0,date,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,1-1-2016,42,34,38.0,0.0,0.0,0
1,2-1-2016,40,32,36.0,0.0,0.0,0
2,3-1-2016,45,35,40.0,0.0,0.0,0
3,4-1-2016,36,14,25.0,0.0,0.0,0
4,5-1-2016,29,11,20.0,0.0,0.0,0


In [70]:
weather['precipitation'][weather['precipitation']=='T']=0
weather['snow fall'][weather['snow fall']=='T']=0
weather['snow depth'][weather['snow depth']=='T']=0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [71]:
weather.head()

Unnamed: 0,date,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,1-1-2016,42,34,38.0,0.0,0.0,0
1,2-1-2016,40,32,36.0,0.0,0.0,0
2,3-1-2016,45,35,40.0,0.0,0.0,0
3,4-1-2016,36,14,25.0,0.0,0.0,0
4,5-1-2016,29,11,20.0,0.0,0.0,0


In [72]:
weather['date'] = pd.to_datetime(weather['date'])
weather['precipitation'] = pd.to_numeric(weather['precipitation'])
weather['snow fall']= pd.to_numeric(weather['snow fall'])
weather['snow depth']= pd.to_numeric(weather['snow depth'])
weather.rename(columns={'date':'pickup_date'}, inplace=True)

In [73]:
train.pickup_date = pd.to_datetime(train.pickup_date)
train['store_and_fwd_flag'] = 1 * (train.store_and_fwd_flag.values == 'Y')
train_merge_weather = pd.merge(train,weather,on='pickup_date')

In [74]:
train_merge_weather['pickup_date'] = train_merge_weather['pickup_datetime'].dt.date
train_merge_weather.head()

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,...,pickup_datetime_group,count_60min,dropoff_cluster_count,pickup_cluster_count,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,id2875421,2,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.96463,40.765602,0,...,2016-03-14 17:00:00,391.0,8.75,12.75,51,40,45.5,0.29,0.0,0
1,id2129090,1,2016-03-14 14:05:39,2016-03-14 14:28:05,1,-73.97509,40.758766,-73.953201,40.765068,0,...,2016-03-14 14:00:00,414.0,7.5,14.25,51,40,45.5,0.29,0.0,0
2,id0256505,1,2016-03-14 15:04:38,2016-03-14 15:16:13,1,-73.994484,40.745087,-73.998993,40.72271,0,...,2016-03-14 15:00:00,476.0,3.0,8.0,51,40,45.5,0.29,0.0,0
3,id3863815,2,2016-03-14 04:24:36,2016-03-14 04:37:11,3,-73.944359,40.714489,-73.91053,40.709492,0,...,2016-03-14 04:00:00,37.0,0.0,0.0,51,40,45.5,0.29,0.0,0
4,id3817493,2,2016-03-14 14:57:56,2016-03-14 15:15:26,1,-73.952881,40.766468,-73.97863,40.761921,0,...,2016-03-14 15:00:00,478.0,11.25,10.75,51,40,45.5,0.29,0.0,0


### Add bike count features

In [75]:
bike_df = pd.read_csv('./City Bike.csv')
bike_df.head()

Unnamed: 0,id,gender_id,pickup_datetime,dropoff_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,trip_duration
0,25242,2,2016-05-30 15:23:05,2016-05-30 15:52:59,-73.976485,40.759923,-74.00215,40.730386,1794
1,20900,1,2016-04-24 19:56:47,2016-04-24 20:02:17,-74.003664,40.743174,-74.00215,40.730386,329
2,18792,1,2016-06-25 15:08:38,2016-06-25 15:15:57,-73.991908,40.716059,-74.005524,40.711464,438
3,17420,1,2016-06-28 18:14:17,2016-06-28 18:35:25,-73.986569,40.701485,-73.9899,40.714275,1268
4,22403,1,2016-06-11 17:10:33,2016-06-11 17:14:44,-73.989551,40.740343,-73.990093,40.73705,251


In [76]:
bike_df['pickup_datetime'] = pd.to_datetime(bike_df['pickup_datetime'])
bike_df.loc[:,'pickup_date'] = bike_df['pickup_datetime'].dt.date
bike_df_date = bike_df.groupby('pickup_date').count()[['id']]
bike_df_date.rename(columns={'id':'count'}, inplace=True)

In [77]:
bike_df_date.reset_index(inplace=True)

In [78]:
train_merge_weather_bike = pd.merge(train_merge_weather,bike_df_date,on='pickup_date')

#### collision

In [79]:
col_df = pd.read_csv('./Collisions.csv')
col_df['DATE'] = pd.to_datetime(col_df['DATE'])

  interactivity=interactivity, compiler=compiler, result=result)


In [80]:
low = date(2016,1,1)
high = date(2016,7,1)

In [81]:
col_df = col_df[(col_df.DATE > low) & (col_df.DATE<high)]

In [82]:
col_df_num = col_df.groupby('DATE').count()[['TIME']]

In [83]:
col_df_num = col_df_num.reset_index()

In [84]:
col_df_num.rename(columns={'DATE':'pickup_date','TIME':'Collisions_num'}, inplace=True)

In [85]:
col_df_num['pickup_date'] = pd.to_datetime(col_df_num['pickup_date'])
train_merge_weather_bike['pickup_date']=pd.to_datetime(train_merge_weather_bike['pickup_date'])

In [86]:
train_weather_bike_col = pd.merge(train_merge_weather_bike,col_df_num,on='pickup_date')

In [87]:
now = date
feature_names = list(train_weather_bike_col.columns)
do_not_use_for_training = ['id', 'log_trip_duration', 'pickup_datetime', 'dropoff_datetime',
                           'trip_duration', 'pickup_date', 'avg_speed_h', 'avg_speed_m',
                           'pickup_lat_bin', 'pickup_long_bin',
                           'center_lat_bin', 'center_long_bin',
                           'pickup_dt_bin', 'pickup_datetime_group','snow fall','snow depth']
feature_names = [f for f in train_weather_bike_col.columns if f not in do_not_use_for_training]
y = np.log(train_weather_bike_col['trip_duration'].values + 1)
Xtr, Xv, ytr, yv = train_test_split(train_weather_bike_col[feature_names].values, y, test_size=0.2, random_state=1987)

train = train[train['trip_duration']<1200]
df_dis_mid = train[(train['rotate_distance']>0.024)& (train['rotate_distance']<0.025)]
sbn.boxplot(x=df_dis_mid['his_dir'],y=df_dis_mid['trip_duration'])
plt.xlabel('clock position')
plt.ylabel('trip duration')

In [88]:
import tensorflow as tf

In [89]:
from tensorflow import estimator

In [90]:
data_tf = train_weather_bike_col[feature_names]
y = np.log(train_weather_bike_col['trip_duration'].values + 1)

In [91]:
data_tf = data_tf.fillna(0)

In [92]:
Xtr, Xv, ytr, yv = train_test_split(data_tf.values, y, test_size=0.2, random_state=1987)

In [93]:
feat_cols = [tf.feature_column.numeric_column('x',shape=[63])]

In [218]:
deep_model = estimator.DNNRegressor(hidden_units=[512,256,128],
                                    feature_columns=feat_cols,
                                    optimizer=tf.train.AdamOptimizer(learning_rate=0.0001),
                                    activation_fn=tf.nn.relu
                                   )

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\liuyu\\AppData\\Local\\Temp\\tmpctzvh4is', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001B4718146A0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [232]:
input_fn = estimator.inputs.numpy_input_fn(x={'x':Xtr},y=ytr,
                                          shuffle=True,
                                          batch_size = 100,
                                          num_epochs=50000)

In [233]:
deep_model.train(input_fn=input_fn,steps = 2000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\liuyu\AppData\Local\Temp\tmpctzvh4is\model.ckpt-4000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 4000 into C:\Users\liuyu\AppData\Local\Temp\tmpctzvh4is\model.ckpt.
INFO:tensorflow:loss = 69326.5, step = 4001
INFO:tensorflow:global_step/sec: 211.889
INFO:tensorflow:loss = 9577.527, step = 4101 (0.471 sec)
INFO:tensorflow:global_step/sec: 240.884
INFO:tensorflow:loss = 337860.44, step = 4201 (0.416 sec)
INFO:tensorflow:global_step/sec: 252.374
INFO:tensorflow:loss = 57606.63, step = 4301 (0.395 sec)
INFO:tensorflow:global_step/sec: 214.257
INFO:tensorflow:loss = 523070.03, step = 4401 (0.469 sec)
INFO:tensorflow:global_step/sec: 254.207
INFO:tensorflow:loss = 164619.86, step = 4501 (0.391 sec)
INFO:tensorflow

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x1b471258d68>

In [234]:
input_fn1 = estimator.inputs.numpy_input_fn(x={'x':Xv},shuffle = False)

In [235]:
y = list(deep_model.predict(input_fn=input_fn1))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\liuyu\AppData\Local\Temp\tmpctzvh4is\model.ckpt-6000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [236]:
y = [p['predictions'] for p in y]

In [237]:
yv = yv.reshape(286686,1)

In [238]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rms = sqrt(mean_squared_error(yv, y))

In [239]:
rms

37.313631170238246