# Section 1: Import Libraries

In [2]:
import numpy as np
import pandas as pd
from keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential, load_model
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime, timedelta
import pickle

In [1]:
import requests
from parkwhere import extract_all_features
from parkwhereviz import plot_bar, plot_heatmap
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In this section, we import the necessary libraries for data manipulation, machine learning, and date handling.

numpy and pandas are used for data manipulation and analysis.
Sequential, LSTM, and Dense are classes from Keras used to build and train the LSTM model.
MinMaxScaler from scikit-learn is used to scale the data.
datetime and timedelta are used to handle dates and time differences.

# Section 2: Load and Preprocess Data

In [3]:
# Load dataset from CSV
df = pd.read_csv('parking_dataset.csv')

df.head()

Unnamed: 0,date_time,parking_zone
0,01/01/2021 02:56,Zone 1
1,01/01/2021 23:51,Zone 1
2,02/01/2021 03:10,Zone 2
3,02/01/2021 08:31,Zone 3
4,03/01/2021 00:35,Zone 1


This section loads the dataset from the CSV file 'parking_dataset.csv' into a pandas DataFrame parking_data. Then, the data is preprocessed:

MinMaxScaler scales the 'Parking Availability Rate' column to a range between 0 and 1.

In [4]:
# Convert `date_time` column into a datetime column
df['date_time'] = pd.to_datetime(df['date_time'], format='%d/%m/%Y %H:%M')

# See data types of columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2190 entries, 0 to 2189
Data columns (total 2 columns):
date_time       2190 non-null datetime64[ns]
parking_zone    2190 non-null object
dtypes: datetime64[ns](1), object(1)
memory usage: 34.3+ KB


In [5]:
print(f"Total number of records: {len(df)}")
print("Breakdown of parking zones:\n")
print(df.parking_zone.value_counts())

Total number of records: 2190
Breakdown of parking zones:

Zone 2    582
Zone 3    550
Zone 4    550
Zone 1    508
Name: parking_zone, dtype: int64


In [6]:
# Create new features from `date_time` column
df['year'] = df['date_time'].dt.year
df['month'] = df['date_time'].dt.month
df['day'] = df['date_time'].dt.day
df['day_of_week'] = df['date_time'].dt.weekday
df['hour'] = df['date_time'].dt.hour
df['minute'] = df['date_time'].dt.minute
df['date'] = df['date_time'].dt.date
df['time'] = df['date_time'].dt.strftime('%H:%M')
df['hour_min'] = round(df['hour'] + (df['minute'] / 60), 1)

# Show first 5 rows
df.head()

Unnamed: 0,date_time,parking_zone,year,month,day,day_of_week,hour,minute,date,time,hour_min
0,2021-01-01 02:56:00,Zone 1,2021,1,1,4,2,56,2021-01-01,02:56,2.9
1,2021-01-01 23:51:00,Zone 1,2021,1,1,4,23,51,2021-01-01,23:51,23.8
2,2021-01-02 03:10:00,Zone 2,2021,1,2,5,3,10,2021-01-02,03:10,3.2
3,2021-01-02 08:31:00,Zone 3,2021,1,2,5,8,31,2021-01-02,08:31,8.5
4,2021-01-03 00:35:00,Zone 1,2021,1,3,6,0,35,2021-01-03,00:35,0.6


In [7]:
# Convert `month` to categorical
df['month'].replace({1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
                     7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}, inplace=True)
df['month'] = df['month'].astype('category') 
df['month'].cat.set_categories(new_categories=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                                               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], ordered=True, inplace=True)

# Convert `day_of_week` to categorical
df['day_of_week'].replace({0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}, inplace=True)
df['day_of_week'] = df['day_of_week'].astype('category') 
df['day_of_week'].cat.set_categories(new_categories=['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'], 
                                     ordered=True, inplace=True)

# Convert `hour` to categorical
df['hour'] = df['hour'].astype('category') 
df['hour'].cat.set_categories(new_categories=list(range(24)), ordered=True, inplace=True)

# Convert `parking_zone` to categorical
df['parking_zone'] = df['parking_zone'].astype('category') 
df['parking_zone'].cat.set_categories(new_categories=['Zone 1', 'Zone 2', 'Zone 3', 'Zone 4'], ordered=True, inplace=True)

# Show first 5 rows
df.head()

Unnamed: 0,date_time,parking_zone,year,month,day,day_of_week,hour,minute,date,time,hour_min
0,2021-01-01 02:56:00,Zone 1,2021,Jan,1,Fri,2,56,2021-01-01,02:56,2.9
1,2021-01-01 23:51:00,Zone 1,2021,Jan,1,Fri,23,51,2021-01-01,23:51,23.8
2,2021-01-02 03:10:00,Zone 2,2021,Jan,2,Sat,3,10,2021-01-02,03:10,3.2
3,2021-01-02 08:31:00,Zone 3,2021,Jan,2,Sat,8,31,2021-01-02,08:31,8.5
4,2021-01-03 00:35:00,Zone 1,2021,Jan,3,Sun,0,35,2021-01-03,00:35,0.6


# Section 3: Exploratory Analysis

In [8]:
import plotly.graph_objects as go

In [9]:
df1 = df.replace({'eve': 'Public Holiday Eve', 'nil': 'Neither', 'ph': 'Public Holiday'})
df1.head()

Unnamed: 0,date_time,parking_zone,year,month,day,day_of_week,hour,minute,date,time,hour_min
0,2021-01-01 02:56:00,Zone 1,2021,Jan,1,Fri,2,56,2021-01-01,02:56,2.9
1,2021-01-01 23:51:00,Zone 1,2021,Jan,1,Fri,23,51,2021-01-01,23:51,23.8
2,2021-01-02 03:10:00,Zone 2,2021,Jan,2,Sat,3,10,2021-01-02,03:10,3.2
3,2021-01-02 08:31:00,Zone 3,2021,Jan,2,Sat,8,31,2021-01-02,08:31,8.5
4,2021-01-03 00:35:00,Zone 1,2021,Jan,3,Sun,0,35,2021-01-03,00:35,0.6


In [12]:
def plot_bar(data, variable, colour='cadetblue'):
    """Plot bar chart of a single variable."""
    
    mapping = {'month': 'Month', 'day_of_week': 'Day of the Week', 
               'hour': 'Hour', 'parking_zone': 'Parking Zone'}
    
    grouped_data = data.groupby(by=variable).size().reset_index(name="Count")

    fig = go.Figure()
    
    fig.add_trace(
        go.Bar(
        x = grouped_data[variable],
        y = grouped_data['Count'],
        marker_color=colour
        )
    )

    fig.update_layout(
        title="",
        xaxis_title=mapping.get(variable),
        yaxis_title="Number of Parking Sessions")

    fig.show()
    

### Distribution of parking sessions by months

In [14]:
plot_bar(df1, "month", colour='green')

### Distribution of parking sessions by day of the week

In [16]:
plot_bar(data=df1, variable="day_of_week", colour='skyblue')

### Distribution of parking sessions by hour of the day


In [17]:
plot_bar(data=df1, variable="hour", colour='maroon')

### Distribution of parking sessions by parking zones (Class distribution)

In [18]:
plot_bar(data=df1, variable="parking_zone", colour='crimson')

### Distribution by hour of the day and parking zones

In [23]:
def plot_stacked_bar(data, primary_var, stacking_var, by_percentage=False):
    
    if by_percentage:
        grouped_data = data.groupby(by=[primary_var, stacking_var]).size()\
                            .groupby(level=0)\
                            .apply(lambda x: round(100 * x / float(x.sum()), 1))\
                            .reset_index(name="Count")
        yaxis_label = "Percentage of Parking Sessions (%)"
    
    else:
        grouped_data = data.groupby(by=[primary_var, stacking_var]).size()\
                            .reset_index(name="Count")
        yaxis_label = "Number of Parking Sessions"
        
    mapping = {'month': 'Month', 'day_of_week': 'Day of the Week', 
               'hour': 'Hour', 'parking_zone': 'Parking Zone'}
    
    if primary_var == 'hour': 
        marker_colors = ["#6ed2b0", "#3abf91", "#2a8867", "#19513e"]

    elif primary_var == 'day_of_week':
        marker_colors = ["#e7d1a1", "#d9b568", "#ab832a", "#554115"]
        
    elif primary_var == 'ph_eve':
        marker_colors = ["#aea4e4", "#7d6dd3", "#4231a5", "#211852"]
    
    stack = data[stacking_var].unique()
    data = []
    for zone, color in zip(sorted(stack), marker_colors):
        data.append(go.Bar(name=zone, x=grouped_data[grouped_data[stacking_var] == zone][primary_var], 
                           y=grouped_data[grouped_data[stacking_var] == zone]['Count'], marker_color=color))
    fig = go.Figure(data=data)
    fig.update_layout(barmode='stack',
                      title="",
                      xaxis_title=mapping.get(primary_var),
                      yaxis_title=yaxis_label, 
                      xaxis_nticks=36)

    fig.show()
    

In [25]:
plot_stacked_bar(df1, 'hour', 'parking_zone', by_percentage=True)

In [24]:
def plot_heatmap(data, variable1, variable2, colorscale="Purples"):
    """Plot heatmap of two variables."""
    
    mapping = {'month': 'Month', 'day_of_week': 'Day of the Week', 
               'hour': 'Hour', 'parking_zone': 'Parking Zone'}
    
    heatmap_data = data.groupby(by=[variable1, variable2])["date"].count().to_frame('Count').reset_index()
    heatmap_data = heatmap_data.pivot(index=variable1, columns=variable2, values="Count")

    fig = go.Figure(data=go.Heatmap(
            z=heatmap_data,
            x=heatmap_data.columns,
            y=heatmap_data.index,
            colorscale=colorscale))

    fig.update_layout(
        title="",
        xaxis_title=mapping.get(variable2),
        yaxis_title=mapping.get(variable1), 
        xaxis_nticks=36)

    fig.show()

In [26]:
plot_heatmap(df1, 'parking_zone', 'hour')

# Section 4: LSTM Model Building

In [28]:
df['parking_zone'].replace(['Zone 2', 'Zone 3', 'Zone 4'], 'Other Zones', inplace=True)

# Show first 5 rows
df.head()

Unnamed: 0,date_time,parking_zone,year,month,day,day_of_week,hour,minute,date,time,hour_min
0,2021-01-01 02:56:00,Zone 1,2021,Jan,1,Fri,2,56,2021-01-01,02:56,2.9
1,2021-01-01 23:51:00,Zone 1,2021,Jan,1,Fri,23,51,2021-01-01,23:51,23.8
2,2021-01-02 03:10:00,Other Zones,2021,Jan,2,Sat,3,10,2021-01-02,03:10,3.2
3,2021-01-02 08:31:00,Other Zones,2021,Jan,2,Sat,8,31,2021-01-02,08:31,8.5
4,2021-01-03 00:35:00,Zone 1,2021,Jan,3,Sun,0,35,2021-01-03,00:35,0.6


In [29]:
X = df.drop(columns='parking_zone')
y = df['parking_zone']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
X.head()

Unnamed: 0,date_time,year,month,day,day_of_week,hour,minute,date,time,hour_min
0,2021-01-01 02:56:00,2021,Jan,1,Fri,2,56,2021-01-01,02:56,2.9
1,2021-01-01 23:51:00,2021,Jan,1,Fri,23,51,2021-01-01,23:51,23.8
2,2021-01-02 03:10:00,2021,Jan,2,Sat,3,10,2021-01-02,03:10,3.2
3,2021-01-02 08:31:00,2021,Jan,2,Sat,8,31,2021-01-02,08:31,8.5
4,2021-01-03 00:35:00,2021,Jan,3,Sun,0,35,2021-01-03,00:35,0.6


In [34]:
y.head()

0         Zone 1
1         Zone 1
2    Other Zones
3    Other Zones
4         Zone 1
Name: parking_zone, dtype: object

# Section 5: Train LSTM Model

In [7]:
# Build LSTM model
model = Sequential()
model.add(LSTM(units=50, input_shape=(1, X.shape[2])))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X, Y, epochs=100, batch_size=1, verbose=2)


Epoch 1/100
1450/1450 - 2s - loss: 0.0386
Epoch 2/100
1450/1450 - 1s - loss: 0.0256
Epoch 3/100
1450/1450 - 1s - loss: 0.0240
Epoch 4/100
1450/1450 - 1s - loss: 0.0229
Epoch 5/100
1450/1450 - 1s - loss: 0.0228
Epoch 6/100
1450/1450 - 1s - loss: 0.0217
Epoch 7/100
1450/1450 - 1s - loss: 0.0214
Epoch 8/100
1450/1450 - 1s - loss: 0.0214
Epoch 9/100
1450/1450 - 2s - loss: 0.0210
Epoch 10/100
1450/1450 - 2s - loss: 0.0206
Epoch 11/100
1450/1450 - 2s - loss: 0.0205
Epoch 12/100
1450/1450 - 2s - loss: 0.0205
Epoch 13/100
1450/1450 - 2s - loss: 0.0201
Epoch 14/100
1450/1450 - 2s - loss: 0.0203
Epoch 15/100
1450/1450 - 1s - loss: 0.0201
Epoch 16/100
1450/1450 - 2s - loss: 0.0198
Epoch 17/100
1450/1450 - 2s - loss: 0.0198
Epoch 18/100
1450/1450 - 2s - loss: 0.0199
Epoch 19/100
1450/1450 - 1s - loss: 0.0197
Epoch 20/100
1450/1450 - 1s - loss: 0.0194
Epoch 21/100
1450/1450 - 2s - loss: 0.0194
Epoch 22/100
1450/1450 - 1s - loss: 0.0196
Epoch 23/100
1450/1450 - 1s - loss: 0.0194
Epoch 24/100
1450/14

<keras.callbacks.History at 0x274e0240b00>

Builds an LSTM model using Keras Sequential API.
Adds an LSTM layer with 50 units and an input shape corresponding to the reshaped input data.
Adds a Dense layer with one unit.
Compiles the model using the Adam optimizer and mean squared error loss function.
Trains the model on the input data X and target data Y for 100 epochs with a batch size of 1.

In [10]:
model.save('model.sav')