In [1]:
!pip install pathway bokeh --quiet # This cell may take a few seconds to execute.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from datetime import datetime
import pathway as pw
import bokeh.plotting
import panel as pn
from bokeh.models import ColumnDataSource

In [3]:
df = pd.read_csv('/content/dataset.csv')
df

# You can find the sample dataset here: https://drive.google.com/file/d/1D479FLjp9aO3Mg8g6Lpj9oRViWacurA6/view?usp=sharing

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00
...,...,...,...,...,...,...,...,...,...,...,...,...
18363,18363,Shopping,1920,26.150504,91.733531,1517,truck,average,6,0,19-12-2016,14:30:00
18364,18364,Shopping,1920,26.150504,91.733531,1487,car,low,3,0,19-12-2016,15:03:00
18365,18365,Shopping,1920,26.150504,91.733531,1432,cycle,low,3,0,19-12-2016,15:29:00
18366,18366,Shopping,1920,26.150504,91.733531,1321,car,low,2,0,19-12-2016,16:03:00


In [4]:
# Combine the 'LastUpdatedDate' and 'LastUpdatedTime' columns into a single datetime column
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

# Sort the DataFrame by the new 'Timestamp' column and reset the index
df = df.sort_values('Timestamp').reset_index(drop=True)

In [5]:
# Save the selected columns to a CSV file for streaming or downstream processing
df[["SystemCodeNumber", "Timestamp", "Occupancy", "Capacity"]].to_csv("parking_stream.csv", index=False)
#Note: Only three features are used here for simplicity.
# Participants are expected to incorporate additional relevant features in their models.

In [6]:
# Define the schema for the streaming data using Pathway
# This schema specifies the expected structure of each data row in the stream

class ParkingSchema(pw.Schema):
    SystemCodeNumber: str
    Timestamp: str   # Timestamp of the observation (should ideally be in ISO format)
    Occupancy: int   # Number of occupied parking spots
    Capacity: int    # Total parking capacity at the location

In [7]:
# Load the data as a simulated stream using Pathway's replay_csv function
# This replays the CSV data at a controlled input rate to mimic real-time streaming
# input_rate=1000 means approximately 1000 rows per second will be ingested into the stream.

data = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

In [None]:
# data.show()

In [None]:
%%capture --no-display
pw.run()

Output()

In [41]:
# Define the datetime format to parse the 'Timestamp' column
fmt = "%Y-%m-%d %H:%M:%S"

# Add new columns to the data stream:
# - 't' contains the parsed full datetime
# - 'day' extracts the date part and resets the time to midnight (useful for day-level aggregations)
data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)

In [42]:
data_with_time.show()

In [43]:
%%capture --no-display
pw.run()

Output()

AttributeError: 'Markdown' object has no attribute 'document'

In [65]:
import datetime
window_size = datetime.timedelta(minutes=30)

In [66]:
BASE_PRICE = 10.0
ALPHA = 2.0

# Tumbling window
model1_window = (
    data_with_time.windowby(
        pw.this.t,         # Use parsed timestamp for time alignment
        instance=pw.this.SystemCodeNumber,  # Group per parking lot
        window=pw.temporal.tumbling(window_size),
        behavior=pw.temporal.exactly_once_behavior()
    )
    .reduce(
        # Use window end time for tracking
        t=pw.this._pw_window_end,

        SystemCodeNumber = pw.reducers.any(pw.this.SystemCodeNumber),

        # Compute average occupancy per lot per window
        occupancy_sum = pw.reducers.sum(pw.this.Occupancy),
        occupancy_count = pw.reducers.count(),
        capacity_max = pw.reducers.max(pw.this.Capacity),
    )
    .with_columns(
        # Apply Model 1 pricing logic: price = base + α × (avg_occupancy / capacity)
        avg_occupancy = pw.this.occupancy_sum / pw.this.occupancy_count,
    )
    .with_columns(
        model1_price = BASE_PRICE + ALPHA * (pw.this.avg_occupancy / pw.this.capacity_max)
    )
)


In [54]:
# class PricingPlotSchema(pw.Schema):
#     SystemCodeNumber: str
#     timestamp: str
#     price: float

In [67]:
model1_output = model1_window.select(
    SystemCodeNumber = model1_window.SystemCodeNumber,
    timestamp = model1_window.t,
    price = model1_window.model1_price
)

In [68]:
model1_output.show()

In [69]:
%%capture --no-display
pw.run()

Output()

AttributeError: 'Markdown' object has no attribute 'document'

In [None]:
# model1_output.select(pw.this.SystemCodeNumber).show()

In [50]:
pw.debug.compute_and_print(model1_output)



            | SystemCodeNumber | timestamp           | price
^GVPSX0C... | BHMBCCMKT01      | 2016-10-04 08:00:00 | 10.211438474870018
^GVPTWAQ... | BHMBCCMKT01      | 2016-10-04 08:30:00 | 10.221837088388215
^GVPRYTN... | BHMBCCMKT01      | 2016-10-04 09:00:00 | 10.277296360485268
^GVPS5D3... | BHMBCCMKT01      | 2016-10-04 09:35:00 | 10.370883882149046
^GVPMBX3... | BHMBCCMKT01      | 2016-10-04 10:00:00 | 10.51993067590988
^GVPJ5N0... | BHMBCCMKT01      | 2016-10-04 10:30:00 | 10.613518197573656
^GVPHBDE... | BHMBCCMKT01      | 2016-10-04 11:00:00 | 10.759098786828423
^GVPV1M7... | BHMBCCMKT01      | 2016-10-04 11:30:00 | 10.856152512998268
^GVPMJHV... | BHMBCCMKT01      | 2016-10-04 12:00:00 | 10.897746967071058
^GVPVCNZ... | BHMBCCMKT01      | 2016-10-04 12:30:00 | 10.922010398613518
^GVPYJSM... | BHMBCCMKT01      | 2016-10-04 13:05:00 | 10.932409012131716
^GVPVEK0... | BHMBCCMKT01      | 2016-10-04 13:30:00 | 10.911611785095321
^GVPKEN1... | BHMBCCMKT01      | 2016-10-04 14:05:00

In [16]:
# # Activate the Panel extension to enable interactive visualizations
# pn.extension()

# # Define a custom Bokeh plotting function that takes a data source (from Pathway) and returns a figure
# def multi_lot_plot(source, lot_name):
#     print(f"Plotting for lot: {lot_name}")
#     print("Timestamps:", timestamp[:3])
#     print("Prices:", price[:3])
#     # Create a Bokeh figure with datetime x-axis
#     fig = bokeh.plotting.figure(
#         height=400,
#         width=800,
#         title=f"Model 1: Pricing Trend - Lot {lot_name}",
#         x_axis_type="datetime",  # Ensure time-based data is properly formatted on the x-axis
#         x_axis_label="Timestamp",
#         y_axis_label="Price ($)",
#     )
#     # Safety check for stream contents
#     if not source.data or "SystemCodeNumber" not in source.data:
#         return fig

#     # Extract data for the current lot
#     mask = [x == lot_name for x in source.data.get("SystemCodeNumber", [])]
#     timestamps = [x for x, m in zip(source.data.get("timestamp", []), mask) if m]
#     prices = [x for x, m in zip(source.data.get("price", []), mask) if m]

#     if not timestamps:
#         return fig

#     sub_source = ColumnDataSource(data={"timestamp": timestamp, "price": price})
#     fig.line("timestamp", "price", source=sub_source, line_width=2, color="navy")
#     fig.circle("timestamp", "price", source=sub_source, size=6, color="red")
#     return fig

#     # Main plotting callback for all lots
# def all_lots_plotter(source):
#     if not source.data or "SystemCodeNumber" not in source.data:
#         return bokeh.plotting.figure(title="No Data")

#     lots_data = source.data.get("SystemCodeNumber", [])
#     if not lots_data:
#         return bokeh.plotting.figure(title="No Lot Data")

#     lots = sorted(set(lots_data))
#     plots = [multi_lot_plot(source, lot_name) for lot_name in lots if lot_name]
#     if not plots:
#       print("⚠️ No plots generated")
#     return pn.Column(*plots)


# # - 'sorting_col="t"' ensures the data is plotted in time order
# dashboard = model1_output.plot(all_lots_plotter, sorting_col="timestamp")

# # Create a Panel layout and make it servable as a web app
# # This line enables the interactive plot to be displayed when the app is served
# pn.panel(dashboard).servable()

In [51]:
# target_lot = "BHMBCCTHL01"

# filtered_model1_output = model1_output.filter(pw.this.SystemCodeNumber == target_lot)

In [52]:
# from bokeh.plotting import figure
# from bokeh.models import ColumnDataSource
# import panel as pn

# pn.extension()

# def single_lot_plot(source):
#     if not source.data or "timestamp" not in source.data:
#         return pn.pane.Markdown("⏳ Waiting for data...")

#     timestamps = source.data.get("timestamp", [])
#     prices = source.data.get("price", [])

#     if not timestamps:
#         return pn.pane.Markdown("⚠️ No data yet for selected lot.")

#     cds = ColumnDataSource(data={"timestamp": timestamps, "price": prices})

#     p = figure(
#         title=f"Model 1 Pricing – Lot {target_lot}",
#         x_axis_type="datetime",
#         width=800,
#         height=400,
#         x_axis_label="Time",
#         y_axis_label="Price ($)"
#     )

#     p.line("timestamp", "price", source=cds, line_width=2, color="darkgreen")
#     p.circle("timestamp", "price", source=cds, size=6, color="orange")

#     return p


In [53]:
# dashboard = filtered_model1_output.plot(single_lot_plot, sorting_col="timestamp")
# pn.panel(dashboard).servable()

In [70]:
source = ColumnDataSource(data={"timestamp": [], "price": []})
plot = bokeh.plotting.figure(
    title="Model 1 Price Stream (first lot)",
    x_axis_type="datetime",
    width=800,
    height=400,
    x_axis_label="Time",
    y_axis_label="Price"
)
plot.line(x="timestamp", y="price", source=source, line_width=2, color="blue")


def stream_updates_model1():
    rows = list(pw.debug.compute_and_print(model1_output))
    if rows:
        latest = rows[-1]
        source.stream({"timestamp": [latest[1]], "price": [latest[2]]}, rollover=100)

pn.state.add_periodic_callback(stream_updates_model1, period=1000)

# ✅ Step 9: Start and show
layout_obj = pn.pane.Bokeh(plot)
layout_obj.servable()

In [71]:
%%capture --no-display
pw.run()

Output()

AttributeError: 'Markdown' object has no attribute 'document'