#  Summer Analytics 2025 Capstone Project
## Dynamic Pricing for Urban Parking Lots

**Name:** Nadeem Mohammad Qureshi  
**Objective:** Build a real-time pricing engine using demand signals like occupancy, queue length, traffic, and special events.


In [18]:
!pip install --upgrade google-cloud-bigquery



In [19]:
import pandas as pd
import numpy as np
from datetime import datetime
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
output_notebook()

###  Data Preprocessing
We clean the dataset by:
- Combining date and time into a single timestamp
- Handling missing values in occupancy, capacity, queue length, and vehicle type
- Mapping traffic levels to numeric values


In [20]:
df = pd.read_csv("https://raw.githubusercontent.com/nadeem-001/summer-analytics-2025-capstone/refs/heads/main/dataset.csv")  # Replace with your actual file path
df.columns = df.columns.str.strip()

# Create Timestamp
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'].astype(str) + ' ' + df['LastUpdatedTime'].astype(str), errors='coerce')

# Handle missing values
df['Capacity'] = df['Capacity'].fillna(df['Capacity'].median())
df['Occupancy'] = df['Occupancy'].fillna(0)
df['QueueLength'] = df['QueueLength'].fillna(0)
df['VehicleType'] = df['VehicleType'].fillna('car')

# Map traffic levels
traffic_map = {'low': 0.2, 'medium': 0.5, 'high': 0.9}
df['Traffic'] = df['TrafficConditionNearby'].map(traffic_map).fillna(0.5)
df['SpecialDay'] = df['IsSpecialDay'].fillna(0)

###  Pricing Models
We implement two models:
- **Baseline Model:** Price increases linearly with occupancy
- **Demand-Based Model:** Uses a weighted score from occupancy, queue length, traffic, special day, and vehicle type


In [21]:
df['OccupancyRate'] = df['Occupancy'] / df['Capacity']
df['VehicleWeight'] = df['VehicleType'].map({'car': 1.0, 'bike': 0.7, 'truck': 1.3})

# Baseline pricing
def baseline_price(row, base_price=10, alpha=5):
    return base_price + alpha * row['OccupancyRate']
df['BaselinePrice'] = df.apply(baseline_price, axis=1)

# Demand-based pricing
def demand_score(row):
    return (
        0.4 * row['OccupancyRate'] +
        0.2 * (row['QueueLength'] / 10) +
        0.2 * row['Traffic'] +
        0.1 * row['SpecialDay'] +
        0.1 * row['VehicleWeight']
    )
df['DemandScore'] = df.apply(demand_score, axis=1)
df['DemandPrice'] = 10 * (1 + 0.5 * df['DemandScore'])


###  Visualization
We use Bokeh to visualize how baseline and demand-based prices change over time for a selected parking lot.


In [22]:
lot_id = df['SystemCodeNumber'].unique()[0]
lot_df = df[df['SystemCodeNumber'] == lot_id].sort_values('Timestamp')

p1 = figure(title=f"Baseline vs Demand Price for Lot {lot_id}", x_axis_type='datetime', width=800, height=300)
p1.line(lot_df['Timestamp'], lot_df['BaselinePrice'], color='blue', legend_label='Baseline Price')
p1.line(lot_df['Timestamp'], lot_df['DemandPrice'], color='green', legend_label='Demand Price')
p1.legend.location = "top_left"
p1.xaxis.axis_label = "Time"
p1.yaxis.axis_label = "Price ($)"
show(column(p1))


In [23]:
df_stream = df[['Timestamp', 'Occupancy', 'Capacity']].dropna()
df_stream.to_csv("parking_stream.csv", index=False)

###  Real-Time Simulation with Pathway
We simulate real-time data streaming using Pathway’s `replay_csv()` and apply a dynamic pricing model in a daily tumbling window.


In [24]:
!pip install pathway
import pathway as pw
class ParkingSchema(pw.Schema):
    Timestamp: str
    Occupancy: int
    Capacity: int

data = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

fmt = "%Y-%m-%d %H:%M:%S"
data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)

import datetime
delta_window = (
    data_with_time
    .windowby(
        pw.this.t,
        instance=pw.this.day,
        window=pw.temporal.tumbling(datetime.timedelta(days=1)),
        behavior=pw.temporal.exactly_once_behavior()
    )
    .reduce(
        t = pw.this._pw_window_end,
        occ_max = pw.reducers.max(pw.this.Occupancy),
        occ_min = pw.reducers.min(pw.this.Occupancy),
        cap = pw.reducers.max(pw.this.Capacity)
    )
    .with_columns(
        price = 10 + (pw.this.occ_max - pw.this.occ_min) / pw.this.cap
    )
)

Collecting pathway
  Downloading pathway-0.25.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting h3>=4 (from pathway)
  Downloading h3-4.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting python-sat>=0.1.8.dev0 (from pathway)
  Downloading python_sat-1.8.dev18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting beartype<0.16.0,>=0.14.0 (from pathway)
  Downloading beartype-0.15.0-py3-none-any.whl.metadata (28 kB)
Collecting diskcache>=5.2.1 (from pathway)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting boto3<1.36.0,>=1.26.76 (from pathway)
  Downloading boto3-1.35.99-py3-none-any.whl.metadata (6.7

In [25]:
import bokeh.plotting
import panel as pn
pn.extension()

def price_plotter(source):
    fig = bokeh.plotting.figure(
        height=400, width=800,
        title="Pathway: Daily Parking Price",
        x_axis_type="datetime"
    )
    fig.line("t", "price", source=source, line_width=2, color="navy")
    fig.circle("t", "price", source=source, size=6, color="red")
    return fig

viz = delta_window.plot(price_plotter, sorting_col="t")
pn.Column(viz).servable()




In [26]:
%%capture --no-display
pw.run()

Output()



###  Summary
- Cleaned and processed real-world parking lot data
- Built baseline and demand-based pricing models
- Simulated real-time pricing using Pathway
- Visualized pricing trends using Bokeh and Panel
