In [27]:
import pandas as pd
import numpy as np
from keras.layers import Input, Dense
from keras.models import Model
from keras.layers import LeakyReLU, PReLU, Dropout

In [4]:
cab_df = pd.read_csv("cab_rides.csv",delimiter='\t',encoding = "utf-16")
weather_df = pd.read_csv("weather.csv",delimiter='\t',encoding = "utf-16")

cab_df['date_time'] = pd.to_datetime(cab_df['time_stamp']/1000, unit='s')
weather_df['date_time'] = pd.to_datetime(weather_df['time_stamp'], unit='s')

#merge the datasets to refelect same time for a location
cab_df['merge_date'] = cab_df.source.astype(str) +" - "+ cab_df.date_time.dt.date.astype("str") +" - "+ cab_df.date_time.dt.hour.astype("str")
weather_df['merge_date'] = weather_df.location.astype(str) +" - "+ weather_df.date_time.dt.date.astype("str") +" - "+ weather_df.date_time.dt.hour.astype("str")

weather_df.index = weather_df['merge_date']

merged_df = cab_df.join(weather_df,on=['merge_date'],rsuffix ='_w')
merged_df['rain'].fillna(0,inplace=True)
merged_df = merged_df[pd.notnull(merged_df['date_time_w'])]
merged_df = merged_df[pd.notnull(merged_df['price'])]
merged_df['day'] = merged_df.date_time.dt.dayofweek
merged_df['hour'] = merged_df.date_time.dt.hour

merged_df.columns
merged_df.count()
X = merged_df[merged_df.product_id=='lyft_line'][['day','distance','hour','temp','clouds', 'pressure','humidity', 'wind', 'rain']]
X.count()
y = merged_df[merged_df.product_id=='lyft_line']['price']
y.count()
X.reset_index(inplace=True)
X = X.drop(columns=['index'])
X.head()
features = pd.get_dummies(X)

In [5]:
# Labels are the values we want to predict
labels = np.array(y)

# Saving feature names for later use
feature_list = list(features.columns)
# Convert to numpy array
features = np.array(features)
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.1, random_state = 0)

In [59]:
input_ = Input(shape = (9,))
x = Dense(128)(input_)
x = Dense(64, activation = 'selu')(x)
x = Dropout(0.1)(x)
x = Dense(32)(x)
x = Dense(16, activation = 'selu')(x)
x = Dense(8)(x)
output = Dense(1)(x)

dnn = Model(input_, output)
dnn.compile(optimizer='Adam', loss='mean_absolute_error')
dnn.fit(train_features, train_labels,
          batch_size=16,
          epochs=50,
       validation_data=(test_features, test_labels))



Train on 68280 samples, validate on 22761 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
14768/68280 [=====>........................] - ETA: 7s - loss: 1.7002

KeyboardInterrupt: 

In [58]:
predict_test = dnn.predict(test_features)
errors = abs(predict_test - test_labels)
mape = 100 * (errors / test_labels)
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 59.17 %.
