In [1]:
# Model 3 - Proximity-Aware competitive Pricing
# this notebook implements a smart pricing model for urban parking lots.
# It uses:
#- Lot-specific demand _occupancy, queue lenth etc.
#- Nearby competitor prices and distance.
#- Business-driven adjusments to stay competitive and attractive.
#- This model is built by using pandas, numpy and pathway.

In [2]:
# Installing necessary libraries: pathway(data streaming pipeline), Bokeh(visualization), Geopy(distance calculation).
!pip install pathway bokeh geopy



In [3]:
# Uploading the dataset to be preprocessed
from google.colab import files
uploaded = files.upload()

Saving dataset.csv to dataset.csv


In [4]:
# Import core python libraries for data manupulation, geo-calculations and pathway pipeline
import pandas as pd
import numpy as np
import json
from geopy.distance import geodesic
import pathway as pw

In [6]:
df = pd.read_csv('/content/dataset.csv')
df.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


In [8]:
# preprocess the dataset:  combine data and time, encode categorical value, sort and save cleaned csv

df = pd.read_csv("dataset.csv")
df["Timestamp"] = pd.to_datetime(df["LastUpdatedDate"] + " " + df["LastUpdatedTime"], dayfirst = True)

vehicle_map = {"bike": 1, "car": 2, "truck": 3}
traffic_map = {"low": 1, "medium": 2, "high": 3}

df["VehicleTypeEncoded"] = df["VehicleType"].map(vehicle_map).fillna(0).astype(int)
df["TrafficEncoded"] = df["TrafficConditionNearby"].map(traffic_map).fillna(0).astype(int)

df = df.sort_values("Timestamp")

df.to_csv("preprocessed_dataset.csv", index=False)

In [9]:
# Define the input schema for pathway's csv reader based on the cleaned dataset

class InputSchema(pw.Schema):
    ID: int
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    LastUpdatedDate: str
    LastUpdatedTime: str
    Timestamp: str
    VehicleTypeEncoded: int
    TrafficEncoded: int

In [10]:
# Setup global dictionaries for price memory, location caching, and proximity map (used in pricing logic)

from collections import defaultdict, deque
import pandas as _pd
import math

last_price = {}

lot_locations = {}

distance_map = defaultdict(list)

PROXIMITY_RADIUS_KM = 0.5

In [11]:
###  Model 3: Proximity-Aware Competitive Pricing Logic

# This model extends Model 2 by incorporating **competitive pricing based on proximity to nearby parking lots**.

# Key business and technical logic applied:

# - **Base Price**: Starts at $10 and adjusts based on local demand factors like occupancy, queue length, traffic, and special days.
# - **Proximity Awareness**: Uses latitude and longitude to identify lots within a 500-meter radius.
# - **Competitive Adjustment**:
# - If nearby lots are cheaper → slight price decrease to stay competitive.
# - If nearby lots are more expensive → opportunity to raise price.
# - **Damping Applied**: Smoothens fluctuations using exponential smoothing (to avoid user shock).
# - **Constraints**: Final price is clipped between $5 and $20.

# This approach reflects real-world dynamic pricing strategies used in urban parking systems to balance demand, competition, and profitability.

In [12]:
# define UDF to calculate final price using:
# - Demand factors (occupancy, queue, trraffic etc.)
# - Nearby lot prices within 500 meters
# - Business-aware adjustments and damping for stability.

@pw.udf
def compute_price(
    lot_id: str,
    lat: float,
    lon: float,
    occupancy: int,
    capacity: int,
    queue: int,
    traffic: int,
    special: int,
    vehicle: int,
    timestamp_str: str
) -> float:
    ts = _pd.to_datetime(timestamp_str, format="%Y-%m-%d %H:%M:%S")


    lot_locations[lot_id] = (lat, lon)


    occ_rate = occupancy / capacity if capacity else 0
    base_price = 10
    demand_score = (
        2.0 * occ_rate +
        0.5 * (queue / 10) +
        0.5 * (traffic / 3) +
        0.3 * special +
        0.3 * (vehicle / 3)
    )
    demand_norm = 2 / (1 + math.exp(-demand_score)) - 1
    price_demand = base_price * (1 + 0.3 * demand_norm)


    comp_effect = 0
    count = 0
    my_location = (lat, lon)


    if not distance_map[lot_id]:
        for other_id, other_loc in lot_locations.items():
            if other_id != lot_id:
                dist_km = geodesic(my_location, other_loc).km
                if dist_km <= PROXIMITY_RADIUS_KM:
                    distance_map[lot_id].append(other_id)

    for other_id in distance_map[lot_id]:
        if other_id in last_price:
            comp_price = last_price[other_id]
            diff = comp_price - price_demand
            comp_effect += diff
            count += 1


    if count > 0:
        comp_adjustment = 0.1 * (comp_effect / count)
    else:
        comp_adjustment = 0

    final_price = price_demand + comp_adjustment


    previous = last_price.get(lot_id, base_price)
    damped_price = 0.7 * previous + 0.3 * final_price
    damped_price = max(5, min(20, damped_price))


    last_price[lot_id] = damped_price
    return round(damped_price, 2)

In [13]:
# Construct the pathway dataflow: read from csv, apply pricing logic, and write the results as JSON lones

input_table = pw.io.csv.read(
    "preprocessed_dataset.csv",
    schema = InputSchema,
    mode = "static",
    autocommit_duration_ms = 100
)

output_table = input_table.select(
    SystemCodeNumber = input_table.SystemCodeNumber,
    Timestamp        = input_table.Timestamp,
    price = compute_price(
        input_table.SystemCodeNumber,
        input_table.Latitude,
        input_table.Longitude,
        input_table.Occupancy,
        input_table.Capacity,
        input_table.QueueLength,
        input_table.TrafficEncoded,
        input_table.IsSpecialDay,
        input_table.VehicleTypeEncoded,
        input_table.Timestamp
    )
)

pw.io.jsonlines.write(output_table, "output_model3.jsonlines")
pw.run()

Output()



In [14]:
# Load the pricing outputs (from pathway) into a pandas DataFrame for analysis

output_data = []
with open('output_model3.jsonlines', "r") as f:
  for lines in f:
    output_data.append(json.loads(lines))

output_df = pd.DataFrame(output_data)
output_df["Timestamp"] =pd.to_datetime(output_df["Timestamp"])
output_df.head()

Unnamed: 0,SystemCodeNumber,Timestamp,price,diff,time
0,BHMEURBRD01,2016-10-07 13:30:00,10.76,1,1751777927548
1,Shopping,2016-12-10 07:59:00,10.28,1,1751777927548
2,Others-CCCPS202,2016-12-06 14:29:00,10.52,1,1751777927548
3,Others-CCCPS202,2016-11-05 08:00:00,10.59,1,1751777927548
4,Others-CCCPS135a,2016-12-08 11:59:00,10.7,1,1751777927548


In [15]:
# plot price over time for a few parking lots using Bokeh to visualize dynamic price behaviour

from bokeh.plotting import figure, output_notebook, show
from bokeh.palettes import Category10
from bokeh.models import HoverTool
output_notebook()

lots_to_plot = output_df["SystemCodeNumber"].unique()[:3]

p = figure(title="Price Over Time – Proximity-Aware Model (Model 3)",
           x_axis_type="datetime", width=900, height=400)

for i, lot in enumerate(lots_to_plot):
    lot_df = output_df[output_df["SystemCodeNumber"] == lot]
    p.line(
        x=lot_df["Timestamp"],
        y=lot_df["price"],
        line_width=2,
        color=Category10[10][i],
        legend_label=lot
    )

p.add_tools(HoverTool(
    tooltips=[("Time", "@x{%F %T}"), ("Price", "@y")],
    formatters={"@x": "datetime"},
    mode="vline"
))

p.xaxis.axis_label = "Time"
p.yaxis.axis_label = "Price ($)"
p.legend.location = "top_left"
p.legend.click_policy = "hide"

show(p)


In [16]:
preprocessed_dataset = pd.read_csv('/content/preprocessed_dataset.csv')
model_3_output_jsonlines = pd.read_csv('/content/output_model3.jsonlines')