# Park Mate Predict Research notebook 
### by: Trevor Loren (tloren@deakin.edu.au)
This notebook uses RNN to predict parking density using historical parking sensor data set from historical parking data and builds and deploys the model to IBM Watson ML 
Live data: https://data.melbourne.vic.gov.au/Transport/On-street-Parking-Bay-Sensors/vh2v-4nfs
Historical data: https://data.melbourne.vic.gov.au/Transport/On-street-Car-Parking-Sensor-Data-2020-Jan-May-/4n3a-s6rn

In [None]:
import pandas as pd
import numpy as np
from datetime import timedelta, datetime
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, SimpleRNN
from keras.optimizers import Adam

sensor_df = pd.read_csv("sensorApr15-29.csv", usecols=["ArrivalTime", "BayId", 'DurationMinutes'])
bay_df = pd.read_csv("On-street_Parking_Bay_Sensors.csv",  usecols=["bay_id", "lat", 'lon'])

sensor_df['ArrivalTime'] = pd.to_datetime(sensor_df['ArrivalTime'])

### Creating bins based on lat and lon

In [None]:
step = 0.005
to_bin = lambda x: np.floor(x / step) * step
bay_df['latbin'] = bay_df.lat.map(to_bin)
bay_df['lonbin'] = bay_df.lon.map(to_bin)

for i in bay_df.index:
    bay_df.at[i, 'location'] = str(round(bay_df.at[i,'latbin'], 4))+","+str(round(bay_df.at[i, 'lonbin'], 4))           
bay_df.head()

### Merging data to remove untracked bays in live data

In [None]:
sensor_df = pd.merge(sensor_df, bay_df[['location','bay_id']], left_on='BayId', right_on='bay_id')
sensor_df.drop(columns=['bay_id'])

### Adding hourly time samples for the duration of the parking from start time

In [None]:
%%time
def time_sampler_2(df):
    samples = []
    for i in df.index:
        duration = df.at[i, 'DurationMinutes']
        if(duration>0):
            hours = int(duration/60)
            for hour_delta in range(1, hours):
                time = df.at[i,'ArrivalTime'] + timedelta(hours = hour_delta)               
                samples.append([time, df.at[i, 'location'], df.at[i, 'BayId'], df.at[i, 'DurationMinutes']])
    return pd.DataFrame(np.array(samples), columns=['ArrivalTime', 'location', 'BayId', 'DurationMinutes'])

df = sensor_df.append(time_sampler_2(sensor_df))
print(sensor_df.shape)
print(df.shape)

### Pivoting the data to move location to the column header with counts of occupied bays, followeed by temporal windowing

In [None]:
df1 = df
df1['Hour'] = df1['ArrivalTime'].dt.round('H')
df1.head()
df1 = df1.drop_duplicates(subset=['Hour', 'BayId'], keep='first')
pivoted_df1 = df1.pivot_table(index='Hour', columns='location', values='BayId', aggfunc='count')
pivoted_df1 = pivoted_df1.fillna(0)

scaler = MinMaxScaler()
pivoted_df1.iloc[:, :] = scaler.fit_transform(pivoted_df1.iloc[:, :].values)


batched_data_x = []
batched_data_y = []
window_size = 3 #2years for yearly recurring events
for i in range(window_size, len(pivoted_df1)):
    batched_data_x.append(pivoted_df1.iloc[i-window_size:i].values)
    batched_data_y.append(pivoted_df1.iloc[i].values)
batched_data_x, batched_data_y = np.array(batched_data_x), np.array(batched_data_y)
print(batched_data_x.shape)
print(batched_data_y.shape)
batched_data_x

#train test split
test_size = 20
x_train = batched_data_x[:-test_size,:,:]
y_train = batched_data_y[:-test_size,:]
x_test = batched_data_x[batched_data_x.shape[0]-test_size:,:,:]
y_test = batched_data_y[batched_data_y.shape[0]-test_size:,:]
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

### Creating fitting and saving our model

In [None]:
regressor = Sequential()
print("Window size = ", x_train.shape[1])
regressor.add(SimpleRNN(units=50, input_shape=(x_train.shape[1], x_train.shape[2]), name='simple_RNN'))
regressor.add(Dropout(0.5, name='drp1'))
regressor.add(Dense(units=124*8, name='fc1'))
regressor.add(Dense(units=47, name='output'))

adamopt = Adam(lr=0.0001)
regressor.compile(optimizer = adamopt, loss='mse')

model_history = regressor.fit(x_train, y_train, epochs=150, batch_size=30, validation_data=(x_test, y_test))

regressor.save("model.h5")
!tar -zcvf model.tgz model.h5

### Plotting and evaluating our prediction results

In [None]:
import matplotlib.pyplot as plt
plt.plot(model_history.history['loss'], label='train')
plt.plot(model_history.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
test_pred = regressor.predict(x_test)
err = np.mean(np.abs(y_test - test_pred))
print('test MAE error for standard averaging:', err)
print(test_pred.shape)

In [None]:
plt.figure(figsize = (18,6))
plt.plot(y_test[0],color='g',label='test_true')
plt.plot(test_pred[0],color='r',label='test_pred')
plt.xlabel('Date')
plt.ylabel('Close')
plt.legend(fontsize=12)
plt.show()

### Storing and deploying the model to Watson ML

In [None]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

wml_credentials = {
    #Your wml credentials from watson ml
}

client = WatsonMachineLearningAPIClient(wml_credentials)
metadata = {
    client.repository.ModelMetaNames.NAME: "parkmate1",
    client.repository.ModelMetaNames.FRAMEWORK_NAME: "tensorflow",
    client.repository.ModelMetaNames.FRAMEWORK_VERSION: "1.15",
    client.repository.ModelMetaNames.FRAMEWORK_LIBRARIES: [{'name':'keras', 'version': '2.2.5'}]
}
model_details = client.repository.store_model( model="model.tgz", meta_props=metadata )

In [None]:
model_id = model_details["metadata"]["guid"]
model_deployment_details = client.deployments.create( artifact_uid=model_id, name="ParkMateForecast" )