In [None]:
!pip install pathway bokeh --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.6/77.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.6/777.6 kB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m61.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from datetime import datetime, timedelta
import bokeh.plotting
import panel as pn
import pathway as pw

In [None]:
df = pd.read_csv(r"/content/dataset.csv")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '/content/dataset.csv'

In [None]:
df.isnull().sum()

Unnamed: 0,0
ID,0
SystemCodeNumber,0
Capacity,0
Latitude,0
Longitude,0
Occupancy,0
VehicleType,0
TrafficConditionNearby,0
QueueLength,0
IsSpecialDay,0


In [None]:
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

df = df.sort_values('Timestamp').reset_index(drop=True)

In [None]:
print("Number of unique IDs:", df['ID'].nunique())
print("Number of unique SystemCodeNumbers:", df['SystemCodeNumber'].nunique())

Number of unique IDs: 18368
Number of unique SystemCodeNumbers: 14


In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df['SystemCodeNumber_Encoded'] = label_encoder.fit_transform(df['SystemCodeNumber'])

In [None]:
df[["Timestamp", "Occupancy", "Capacity", "SystemCodeNumber_Encoded"]].to_csv("parking_stream.csv", index=False)

In [None]:

class ParkingSchema(pw.Schema):
    Timestamp: str   # Timestamp of the observation (should ideally be in ISO format)
    Occupancy: int   # Number of occupied parking spots
    Capacity: int    # Total parking capacity at the location
    SystemCodeNumber_Encoded: int  # Encoded system code number

In [None]:
data = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

In [None]:
# Define the datetime format to parse the 'Timestamp' column
fmt = "%Y-%m-%d %H:%M:%S"

# Add new columns to the data stream:
# - 't' contains the parsed full datetime
# - 'day' extracts the date part and resets the time to midnight (useful for day-level aggregations)
data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)


In [None]:
# Define a daily tumbling window over the data stream using Pathway
# This block performs temporal aggregation and computes a dynamic price for each day
import datetime

delta_window = (
    data_with_time.groupby(pw.this.SystemCodeNumber_Encoded).
        windowby(
        pw.this.t,  # Event time column to use for windowing (parsed datetime)
        instance=pw.this.day,  # Logical partitioning key: one instance per calendar day
        window=pw.temporal.tumbling(datetime.timedelta(days=1)),  # Fixed-size daily window
        behavior=pw.temporal.exactly_once_behavior()  # Guarantees exactly-once processing semantics
    )
    .reduce(
        SystemCodeNumber_Encoded=pw.this._pw_instance,
        t=pw.this._pw_window_end,                        # Assign the end timestamp of each window
        occ_max=pw.reducers.max(pw.this.Occupancy),      # Highest occupancy observed in the window
        occ_min=pw.reducers.min(pw.this.Occupancy),      # Lowest occupancy observed in the window
        cap=pw.reducers.max(pw.this.Capacity),           # Maximum capacity observed (typically constant per spot)
    )
    .with_columns(
        price=10 + (pw.this.occ_max - pw.this.occ_min) / pw.this.cap
    )
)


In [None]:
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource  # <-- Add this import
from bokeh.layouts import column
# Get unique system codes (assuming you know them or fetch dynamically)
unique_system_codes = list(range(14))  # or from your label encoder

# Create a ColumnDataSource for each system code
sources = {code: ColumnDataSource(data=dict(t=[], price=[])) for code in unique_system_codes}

# Create a figure for each system code
def create_figure(system_code):
    p = figure(height=300, width=600, title=f"SystemCodeNumber_Encoded: {system_code}",
               x_axis_type="datetime")
    p.line('t', 'price', source=sources[system_code], line_width=2, color="navy")
    p.circle('t', 'price', source=sources[system_code], size=6, color="red")
    return p

plots = [create_figure(code) for code in unique_system_codes]

# Layout all plots vertically (or use gridplot for grid layout)
layout = column(*plots)

# Define a callback to update sources with new data from Pathway stream
def update_sources(new_data_source):
    data = new_data_source.data
    length = len(data['t'])
    for i in range(length):
        code = data['SystemCodeNumber_Encoded'][i]
        if code in sources:
            new_dict = {
                't': [data['t'][i]],
                'price': [data['price_clamped'][i]]  # <-- Use price_clamped here
            }
            sources[code].stream(new_dict, rollover=1000)
# Use Pathway's plot method or your streaming mechanism to call update_sources
# For example, if demand_window.plot() accepts a custom plotter, you can adapt it:

def multi_plotter(batch):
    # batch is a batch of new rows from demand_window
    update_sources(batch)
    return layout  # return the layout containing all plots

viz = delta_window.plot(multi_plotter, sorting_col="t")

# Serve with Panel
pn.extension('bokeh', comms='colab')
pn.Column(viz).servable()




In [None]:
pw.run()

Output()



2) Model Number 2

In [None]:
df['VehicleType_Encoded'] = label_encoder.fit_transform(df['VehicleType'])
df['TrafficConditionNearby_Encoded'] = label_encoder.fit_transform(df['TrafficConditionNearby'])
df

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp,SystemCodeNumber_Encoded,VehicleType_Encoded,TrafficConditionNearby_Encoded
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0,1,2
1,5248,BHMNCPHST01,1200,26.140014,91.731000,237,bike,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,4,0,2
2,3936,BHMMBMMBX01,687,20.000035,78.000003,264,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,3,1,2
3,6560,BHMNCPNST01,485,26.140048,91.730972,249,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,5,1,2
4,17056,Shopping,1920,26.150504,91.733531,614,cycle,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,13,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18363,3935,BHMEURBRD01,470,26.149020,91.739503,373,car,low,2,0,19-12-2016,16:30:00,2016-12-19 16:30:00,2,1,2
18364,2623,BHMBCCTHL01,387,26.144495,91.736205,387,car,low,2,0,19-12-2016,16:30:00,2016-12-19 16:30:00,1,1,2
18365,1311,BHMBCCMKT01,577,26.144536,91.736172,193,cycle,low,2,0,19-12-2016,16:30:00,2016-12-19 16:30:00,0,2,2
18366,17055,Others-CCCPS98,3103,26.147500,91.727978,1671,car,low,3,0,19-12-2016,16:30:00,2016-12-19 16:30:00,12,1,2


In [None]:
print(df[['VehicleType', 'VehicleType_Encoded']].drop_duplicates())
print(df[['TrafficConditionNearby', 'TrafficConditionNearby_Encoded']].drop_duplicates())

   VehicleType  VehicleType_Encoded
0          car                    1
1         bike                    0
4        cycle                    2
14       truck                    3
   TrafficConditionNearby  TrafficConditionNearby_Encoded
0                     low                               2
7                 average                               0
86                   high                               1


In [None]:
# rather than random label encoder we can assign weighted mapping
VType = {'cycle': 0, 'bike': 0.33, 'car': 0.66, 'truck': 1}
df['VehicleTypeWeight'] = df['VehicleType'].map(VType)

traffic = {'low': 0, 'average': 0.5, 'high': 1}
df['Traffic'] = df['TrafficConditionNearby'].map(traffic)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df['QueueLength_Normalized'] = scaler.fit_transform(df[['QueueLength']])

print(df[['QueueLength', 'QueueLength_Normalized']])

       QueueLength  QueueLength_Normalized
0                1                0.066667
1                2                0.133333
2                2                0.133333
3                2                0.133333
4                2                0.133333
...            ...                     ...
18363            2                0.133333
18364            2                0.133333
18365            2                0.133333
18366            3                0.200000
18367            2                0.133333

[18368 rows x 2 columns]


In [None]:
df['Ratio'] = df['Occupancy'] / df['Capacity']


In [None]:
df['Ratio_Norm'] = scaler.fit_transform(df[['Ratio']])

print(df[['Ratio', 'Ratio_Norm']])

          Ratio  Ratio_Norm
0      0.105719    0.098521
1      0.197500    0.186953
2      0.384279    0.366915
3      0.513402    0.491326
4      0.319792    0.304781
...         ...         ...
18363  0.793617    0.761314
18364  1.000000    0.960165
18365  0.334489    0.318942
18366  0.538511    0.515518
18367  0.614583    0.588814

[18368 rows x 2 columns]


In [None]:
df[['SystemCodeNumber_Encoded','QueueLength_Normalized', 'VehicleTypeWeight', 'Traffic','IsSpecialDay','Ratio_Norm','Timestamp']].to_csv('Parking_stream.csv', index=False)


In [None]:
class ParkingSchema(pw.Schema):
    Timestamp: str
    Ratio_Norm: float
    IsSpecialDay: int
    Traffic: float
    VehicleTypeWeight: float
    QueueLength_Normalized: float
    SystemCodeNumber_Encoded: int

In [None]:
data = pw.demo.replay_csv("Parking_stream.csv", schema=ParkingSchema, input_rate=100)

In [None]:
fmt = "%Y-%m-%d %H:%M:%S"

data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)

In [None]:
demand_window = (
    data_with_time.groupby(pw.this.SystemCodeNumber_Encoded)
    .windowby(
        pw.this.t,
        instance=pw.this.day,
        window=pw.temporal.tumbling(timedelta(days=1)),
        behavior=pw.temporal.exactly_once_behavior()
    )
    .reduce(
        SystemCodeNumber_Encoded=pw.this._pw_instance,
        t=pw.this._pw_window_end,
        ratio_norm=pw.reducers.avg(pw.this.Ratio_Norm),
        queue_norm=pw.reducers.max(pw.this.QueueLength_Normalized),
        traffic_norm=pw.reducers.avg(pw.this.Traffic),
        is_special_day=pw.reducers.max(pw.this.IsSpecialDay),
        vehicle_weight=pw.reducers.avg(pw.this.VehicleTypeWeight)
    )
    .with_columns(
        base_price=10,
        demand_index=(
            0.4 * pw.this.ratio_norm +
            0.3 * pw.this.queue_norm +
            0.2 * pw.this.traffic_norm +
            0.1 * pw.this.is_special_day +
            0.05 * pw.this.vehicle_weight
        ),
    )
    .with_columns(
        price=pw.this.base_price * (1 + pw.this.demand_index),
    )
    .with_columns(
        price_clamped=pw.apply(
            lambda x: max(5, min(20, x)),
            pw.this.price
        )
    )
)

In [None]:
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource  # <-- Add this import
from bokeh.layouts import column
# Get unique system codes (assuming you know them or fetch dynamically)
unique_system_codes = list(range(14))  # or from your label encoder

# Create a ColumnDataSource for each system code
sources = {code: ColumnDataSource(data=dict(t=[], price=[])) for code in unique_system_codes}

# Create a figure for each system code
def create_figure(system_code):
    p = figure(height=300, width=600, title=f"SystemCodeNumber_Encoded: {system_code}",
               x_axis_type="datetime")
    p.line('t', 'price', source=sources[system_code], line_width=2, color="navy")
    p.circle('t', 'price', source=sources[system_code], size=6, color="red")
    return p

plots = [create_figure(code) for code in unique_system_codes]

# Layout all plots vertically (or use gridplot for grid layout)
layout = column(*plots)

# Define a callback to update sources with new data from Pathway stream
def update_sources(new_data_source):
    data = new_data_source.data  # This is a dict of columns, each a list
    length = len(data['t'])      # Number of new data points

    for i in range(length):
        code = data['SystemCodeNumber_Encoded'][i]
        if code in sources:
            new_dict = {
                't': [data['t'][i]],
                'price': [data['price'][i]]
            }
            sources[code].stream(new_dict, rollover=1000)
# Use Pathway's plot method or your streaming mechanism to call update_sources
# For example, if demand_window.plot() accepts a custom plotter, you can adapt it:

def multi_plotter(batch):
    # batch is a batch of new rows from demand_window
    update_sources(batch)
    return layout  # return the layout containing all plots

viz = demand_window.plot(multi_plotter, sorting_col="t")

# Serve with Panel
pn.extension()
pn.Column(viz).servable()





In [None]:
# Start the Pathway pipeline execution in the background
# - This triggers the real-time data stream processing defined above
# - %%capture --no-display suppresses output in the notebook interface

%%capture --no-display
pw.run()

Output()

