**INSTALLING** **LIBRARIES**

In [3]:
!pip install pathway bokeh --quiet

# Step 1: Importing and Preprocessing the Data

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from datetime import datetime
import pathway as pw
import bokeh.plotting
import panel as pn

In [5]:
data = pd.read_csv('/content/dataset_capstone project.csv')
data.head(10)

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00
5,5,BHMBCCMKT01,577,26.144536,91.736172,177,car,low,3,0,04-10-2016,10:26:00
6,6,BHMBCCMKT01,577,26.144536,91.736172,219,truck,high,6,0,04-10-2016,10:59:00
7,7,BHMBCCMKT01,577,26.144536,91.736172,247,car,average,5,0,04-10-2016,11:25:00
8,8,BHMBCCMKT01,577,26.144536,91.736172,259,cycle,average,5,0,04-10-2016,11:59:00
9,9,BHMBCCMKT01,577,26.144536,91.736172,266,bike,high,8,0,04-10-2016,12:29:00


In [6]:
#seeing that data has any null values

data.isnull().sum()


Unnamed: 0,0
ID,0
SystemCodeNumber,0
Capacity,0
Latitude,0
Longitude,0
Occupancy,0
VehicleType,0
TrafficConditionNearby,0
QueueLength,0
IsSpecialDay,0


In [7]:
# Combine the 'LastUpdatedDate' and 'LastUpdatedTime' columns into a single datetime column
data['Timestamp'] = pd.to_datetime(data['LastUpdatedDate'] + ' ' + data['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

# Sort the DataFrame by the new 'Timestamp' column and reset the index
data = data.sort_values('Timestamp').reset_index(drop=True)

CREATING A STREAMING DATA

In [8]:
# Save the selected columns to a CSV file for streaming or downstream processing
data[["ID","Timestamp", "Occupancy", "Capacity"]].to_csv("parking_stream.csv", index=False)


In [9]:
class ParkingSchema(pw.Schema):
  ID : str         # Unique identifier for the parking location
  Timestamp: str   # Timestamp of the observation (should ideally be in ISO format)
  Occupancy: int   # Number of occupied parking spots
  Capacity: int    # Total parking capacity at the location



In [10]:
# Load the data as a simulated stream using Pathway's replay_csv function
# This replays the CSV data at a controlled input rate to mimic real-time streaming
# input_rate=1000 means approximately 1000 rows per second will be ingested into the stream.

data_stream = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

In [11]:
# Define the datetime format to parse the 'Timestamp' column
fmt = "%Y-%m-%d %H:%M:%S"

# Add new columns to the data stream:
# - 't' contains the parsed full datetime
# - 'day' extracts the date part and resets the time to midnight (useful for day-level aggregations)
data_with_time = data_stream.with_columns(
    t = data_stream.Timestamp.dt.strptime(fmt),
    day = data_stream.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)


# Step-2 : creating a pricing function

In [12]:
def model1_linear_model(occupancy, capacity, alpha=2.5):
    """Calculates a linear model price based on occupancy rate."""
    base_price = 10

    occupancy_rate = occupancy / capacity
    # Simple linear model: price increases with occupancy rate
    new_price = base_price + alpha * occupancy_rate
    return new_price

# Apply the pricing model to the data stream row by row
# Using with_columns and a UDF that operates on column references
prices_stream = data_with_time.with_columns(
    Price=pw.udf(model1_linear_model)(data_with_time.Occupancy, data_with_time.Capacity)
)

# You can now process the 'prices_stream' which includes the calculated 'Price' column
# For example, to display the first few rows of the stream:
# prices_stream.display() # Note: display() on a stream is for debugging and might print continuously

#Step 3: Visualizing Daily Price Fluctuations with a Bokeh Plot


In [15]:
#Activate the Panel extension to enable interactive visualizations
pn.extension()

# Define a custom Bokeh plotting function that takes a data source (from Pathway) and returns a figure
def price_plotter(source):
    # Create a Bokeh figure with datetime x-axis
    fig = bokeh.plotting.figure(
        height=500,
        width=1000,
        title="Pathway: Daily Parking Price",
        x_axis_type="datetime",  # Ensure time-based data is properly formatted on the x-axis
    )
    # Plot a line graph showing how the price evolves over time
    fig.line("t", "Price", source=source, line_width=2, color="navy")

    # Overlay red circles at each data point for better visibility
    fig.circle("t", "Price", source=source, size=6, color="red")

    return fig

# Use Pathway's built-in .plot() method to bind the data stream (delta_window) to the Bokeh plot
# - 'price_plotter' is the rendering function
# - 'sorting_col="t"' ensures the data is plotted in time order
viz = prices_stream.plot(price_plotter, sorting_col="t")

# Create a Panel layout and make it servable as a web app
# This line enables the interactive plot to be displayed when the app is served
pn.Column(viz).servable()



In [16]:
# Start the Pathway pipeline execution in the background
# - This triggers the real-time data stream processing defined above
# - %%capture --no-display suppresses output in the notebook interface

%%capture --no-display
pw.run()

Output()

