In [None]:
# Imports
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv("dataset.csv")

# Combine date and time to create a datetime column
# Combine date and time to create a datetime column correctly
df['Timestamp'] = pd.to_datetime(
    df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
    dayfirst=True  # This fixes the error
)


# Sort values by lot and timestamp
df = df.sort_values(by=['SystemCodeNumber', 'Timestamp'])

# Initialize base price
df['BasePrice'] = 10.0

# Calculate occupancy ratio
df['OccupancyRatio'] = df['Occupancy'] / df['Capacity']

# Alpha value (sensitivity to occupancy)
alpha = 5.0

# Apply baseline pricing
prices = []
for lot_id, group in df.groupby('SystemCodeNumber'):
    prev_price = 10.0
    lot_prices = []
    for _, row in group.iterrows():
        price = prev_price + alpha * row['OccupancyRatio']
        price = max(5, min(20, price))  # Clamp price between $5 and $20
        lot_prices.append(price)
        prev_price = price
    prices.extend(lot_prices)

df['BaselinePrice'] = prices

# Save baseline output
df.to_csv("baseline_model_output.csv", index=False)

# Show result
df[['SystemCodeNumber', 'Timestamp', 'Occupancy', 'Capacity', 'OccupancyRatio', 'BaselinePrice']].head(10)


Unnamed: 0,SystemCodeNumber,Timestamp,Occupancy,Capacity,OccupancyRatio,BaselinePrice
0,BHMBCCMKT01,2016-10-04 07:59:00,61,577,0.105719,10.528596
1,BHMBCCMKT01,2016-10-04 08:25:00,64,577,0.110919,11.083189
2,BHMBCCMKT01,2016-10-04 08:59:00,80,577,0.138648,11.77643
3,BHMBCCMKT01,2016-10-04 09:32:00,107,577,0.185442,12.70364
4,BHMBCCMKT01,2016-10-04 09:59:00,150,577,0.259965,14.003466
5,BHMBCCMKT01,2016-10-04 10:26:00,177,577,0.306759,15.537262
6,BHMBCCMKT01,2016-10-04 10:59:00,219,577,0.379549,17.435009
7,BHMBCCMKT01,2016-10-04 11:25:00,247,577,0.428076,19.57539
8,BHMBCCMKT01,2016-10-04 11:59:00,259,577,0.448873,20.0
9,BHMBCCMKT01,2016-10-04 12:29:00,266,577,0.461005,20.0


In [None]:
# Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load data
df = pd.read_csv("dataset.csv")

# Create timestamp
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)
df = df.sort_values(by=['SystemCodeNumber', 'Timestamp'])

# Feature engineering
df['OccupancyRatio'] = df['Occupancy'] / df['Capacity']

# Map categorical values
traffic_map = {'low': 1, 'average': 2, 'high': 3}
vehicle_map = {'cycle': 1, 'bike': 2, 'car': 3, 'truck': 4}
df['TrafficWeight'] = df['TrafficConditionNearby'].map(traffic_map)
df['VehicleWeight'] = df['VehicleType'].map(vehicle_map)

# Coefficients for demand calculation
α, β, γ, δ, ε = 5, 2, 3, 4, 1

# Demand function
df['DemandRaw'] = (
    α * df['OccupancyRatio'] +
    β * df['QueueLength'] -
    γ * df['TrafficWeight'] +
    δ * df['IsSpecialDay'] +
    ε * df['VehicleWeight']
)

# Normalize demand score between 0 and 1
scaler = MinMaxScaler()
df['NormalizedDemand'] = scaler.fit_transform(df[['DemandRaw']])

# Lambda for price sensitivity
λ = 0.8
base_price = 10.0

# Final price (bounded between $5 and $20)
df['DemandPrice'] = base_price * (1 + λ * df['NormalizedDemand'])
df['DemandPrice'] = df['DemandPrice'].clip(5, 20)

# Save output
df.to_csv("demand_model_output.csv", index=False)

# Display few rows
df[['SystemCodeNumber', 'Timestamp', 'OccupancyRatio', 'QueueLength', 'TrafficWeight',
    'IsSpecialDay', 'VehicleWeight', 'DemandRaw', 'NormalizedDemand', 'DemandPrice']].head(10)


Unnamed: 0,SystemCodeNumber,Timestamp,OccupancyRatio,QueueLength,TrafficWeight,IsSpecialDay,VehicleWeight,DemandRaw,NormalizedDemand,DemandPrice
0,BHMBCCMKT01,2016-10-04 07:59:00,0.105719,1,1,0,3,2.528596,0.128403,11.027227
1,BHMBCCMKT01,2016-10-04 08:25:00,0.110919,1,1,0,3,2.554593,0.129161,11.033286
2,BHMBCCMKT01,2016-10-04 08:59:00,0.138648,2,1,0,3,4.693241,0.191469,11.531752
3,BHMBCCMKT01,2016-10-04 09:32:00,0.185442,2,1,0,3,4.92721,0.198286,11.586284
4,BHMBCCMKT01,2016-10-04 09:59:00,0.259965,2,1,0,2,4.299827,0.180007,11.440057
5,BHMBCCMKT01,2016-10-04 10:26:00,0.306759,3,1,0,3,7.533795,0.274227,12.193814
6,BHMBCCMKT01,2016-10-04 10:59:00,0.379549,6,3,0,4,8.897747,0.313965,12.511718
7,BHMBCCMKT01,2016-10-04 11:25:00,0.428076,5,2,0,3,9.140381,0.321034,12.56827
8,BHMBCCMKT01,2016-10-04 11:59:00,0.448873,5,2,0,1,7.244367,0.265794,12.126356
9,BHMBCCMKT01,2016-10-04 12:29:00,0.461005,8,3,0,2,11.305026,0.384099,13.072794


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from math import radians, sin, cos, sqrt, atan2

# Load data
df = pd.read_csv("dataset.csv")
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)
df = df.sort_values(by=['SystemCodeNumber', 'Timestamp'])

# Feature mappings
df['OccupancyRatio'] = df['Occupancy'] / df['Capacity']
df['TrafficWeight'] = df['TrafficConditionNearby'].map({'low': 1, 'average': 2, 'high': 3})
df['VehicleWeight'] = df['VehicleType'].map({'cycle': 1, 'bike': 2, 'car': 3, 'truck': 4})

# Demand model
α, β, γ, δ, ε = 5, 2, 3, 4, 1
df['DemandRaw'] = (
    α * df['OccupancyRatio'] +
    β * df['QueueLength'] -
    γ * df['TrafficWeight'] +
    δ * df['IsSpecialDay'] +
    ε * df['VehicleWeight']
)

scaler = MinMaxScaler()
df['NormalizedDemand'] = scaler.fit_transform(df[['DemandRaw']])
λ = 0.8
base_price = 10.0
df['DemandPrice'] = (base_price * (1 + λ * df['NormalizedDemand'])).clip(5, 20)

# Store lat/long for lots
lot_coords = df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].first()

# Haversine function
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    return R * 2 * atan2(sqrt(a), sqrt(1 - a))

# Find nearby lots within 1 km
from collections import defaultdict
nearby_lots = defaultdict(list)

for lot1, coord1 in lot_coords.iterrows():
    for lot2, coord2 in lot_coords.iterrows():
        if lot1 != lot2:
            distance = haversine(coord1['Latitude'], coord1['Longitude'], coord2['Latitude'], coord2['Longitude'])
            if distance <= 1.0:
                nearby_lots[lot1].append(lot2)

# Apply competitive pricing
def adjust_price(row):
    lot = row['SystemCodeNumber']
    time = row['Timestamp']
    demand_price = row['DemandPrice']

    if lot not in nearby_lots or not nearby_lots[lot]:
        return demand_price, ""

    competitors = df[
        (df['SystemCodeNumber'].isin(nearby_lots[lot])) &
        (df['Timestamp'] == time)
    ]

    if competitors.empty:
        return demand_price, ""

    nearby_avg = competitors['DemandPrice'].mean()
    occupancy = row['OccupancyRatio']
    queue = row['QueueLength']

    if occupancy > 0.95 and demand_price > nearby_avg:
        return max(5, demand_price - 2), "Reroute Suggested"
    elif demand_price < nearby_avg:
        return min(20, demand_price + 1.5), ""
    else:
        return demand_price, ""

# Apply adjustment
adjusted = df.apply(lambda row: adjust_price(row), axis=1)
df['CompetitivePrice'] = [x[0] for x in adjusted]
df['RerouteFlag'] = [x[1] for x in adjusted]

# Save final output
df.to_csv("competitive_model_output.csv", index=False)

# Show result
df[['SystemCodeNumber', 'Timestamp', 'DemandPrice', 'CompetitivePrice', 'RerouteFlag']].head(10)


Unnamed: 0,SystemCodeNumber,Timestamp,DemandPrice,CompetitivePrice,RerouteFlag
0,BHMBCCMKT01,2016-10-04 07:59:00,11.027227,12.527227,
1,BHMBCCMKT01,2016-10-04 08:25:00,11.033286,12.533286,
2,BHMBCCMKT01,2016-10-04 08:59:00,11.531752,13.031752,
3,BHMBCCMKT01,2016-10-04 09:32:00,11.586284,13.086284,
4,BHMBCCMKT01,2016-10-04 09:59:00,11.440057,12.940057,
5,BHMBCCMKT01,2016-10-04 10:26:00,12.193814,13.693814,
6,BHMBCCMKT01,2016-10-04 10:59:00,12.511718,14.011718,
7,BHMBCCMKT01,2016-10-04 11:25:00,12.56827,14.06827,
8,BHMBCCMKT01,2016-10-04 11:59:00,12.126356,13.626356,
9,BHMBCCMKT01,2016-10-04 12:29:00,13.072794,14.572794,


In [None]:
!pip install -U pathway


Collecting pathway
  Downloading pathway-0.24.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting h3>=4 (from pathway)
  Downloading h3-4.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting python-sat>=0.1.8.dev0 (from pathway)
  Downloading python_sat-1.8.dev17-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting beartype<0.16.0,>=0.14.0 (from pathway)
  Downloading beartype-0.15.0-py3-none-any.whl.metadata (28 kB)
Collecting diskcache>=5.2.1 (from pathway)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting boto3<1.36.0,>=1.26.76 (from pathway)
  Downloading boto3-1.35.99-py3-none-any.whl.metadata (6.7

In [None]:
import pathway as pw
import pandas as pd
from math import radians, sin, cos, sqrt, atan2
from typing import Dict, List, Tuple


In [None]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    return R * 2 * atan2(sqrt(a), sqrt(1 - a))

def get_nearby_lots(df: pd.DataFrame, radius_km: float = 1.0) -> Dict[str, List[str]]:
    coords = df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].first()
    nearby = {}
    for lot1, coord1 in coords.iterrows():
        nearby[lot1] = []
        for lot2, coord2 in coords.iterrows():
            if lot1 != lot2:
                dist = haversine(coord1['Latitude'], coord1['Longitude'], coord2['Latitude'], coord2['Longitude'])
                if dist <= radius_km:
                    nearby[lot1].append(lot2)
    return nearby

# Load static dataframe for precomputing nearby lots
static_df = pd.read_csv("dataset.csv")
static_df['Timestamp'] = pd.to_datetime(static_df['LastUpdatedDate'] + ' ' + static_df['LastUpdatedTime'], dayfirst=True)
nearby_lot_map = get_nearby_lots(static_df)


In [None]:
@pw.udf
def compute_competitive_price(
    lot_id: str,
    occupancy: int,
    capacity: int,
    queue: int,
    traffic: str,
    special: int,
    vehicle: str,
    timestamp: str
) -> Tuple[float, str]:
    occupancy_ratio = occupancy / capacity if capacity else 0
    traffic_weight = {'low': 1, 'average': 2, 'high': 3}.get(traffic, 2)
    vehicle_weight = {'cycle': 1, 'bike': 2, 'car': 3, 'truck': 4}.get(vehicle, 2)

    # Demand function
    demand_score = (
        5 * occupancy_ratio +
        2 * queue -
        3 * traffic_weight +
        4 * special +
        1 * vehicle_weight
    )

    # Normalize demand to [0, 1]
    norm_demand = min(1.0, max(0.0, demand_score / 100))

    # Base pricing
    base_price = 10
    demand_price = min(20, max(5, base_price * (1 + 0.8 * norm_demand)))

    # Simple competition logic (can be extended with streamed competitor table)
    reroute = ""
    if occupancy_ratio > 0.95 and lot_id in nearby_lot_map:
        reroute = "Reroute Suggested"

    return round(demand_price, 2), reroute


In [None]:
# Create Pathway streaming table from dataset
table = pw.io.csv.read(
    "dataset.csv",
    schema=[
        ("SystemCodeNumber", pw.Type.STRING),
        ("Capacity", pw.Type.INT),
        ("Latitude", pw.Type.FLOAT),
        ("Longitude", pw.Type.FLOAT),
        ("Occupancy", pw.Type.INT),
        ("VehicleType", pw.Type.STRING),
        ("TrafficConditionNearby", pw.Type.STRING),
        ("QueueLength", pw.Type.INT),
        ("IsSpecialDay", pw.Type.INT),
        ("LastUpdatedDate", pw.Type.STRING),
        ("LastUpdatedTime", pw.Type.STRING)
    ],
    mode="streaming_with_deletions"
)

# Combine date and time into a timestamp column
@pw.udf
def combine_timestamp(date: str, time: str) -> str:
    return pd.to_datetime(date + " " + time, dayfirst=True).strftime("%Y-%m-%d %H:%M:%S")

enriched = table.select(
    lot_id = table.SystemCodeNumber,
    timestamp = combine_timestamp(table.LastUpdatedDate, table.LastUpdatedTime),
    price, reroute = compute_competitive_price(
        table.SystemCodeNumber,
        table.Occupancy,
        table.Capacity,
        table.QueueLength,
        table.TrafficConditionNearby,
        table.IsSpecialDay,
        table.VehicleType,
        combine_timestamp(table.LastUpdatedDate, table.LastUpdatedTime)
    )
)

# Output to JSONL for visualization or download
pw.io.jsonlines.write(enriched, "pricing_output.jsonl")

# Run pipeline
pw.run()


TypeError: Function pathway.io.csv.read() parameter schema=[('SystemCodeNumber', <pathway.engine.PathwayType object at 0x787330fd2190>), ('Capacity', <p... violates type hint type[pathway.internals.schema.Schema] | None, as list [('SystemCodeNumber', <pathway.engine.PathwayType object at 0x787330fd2190>), ('Capacity', <p... not type or <class "builtins.NoneType">.

In [None]:
# Read the CSV in streaming mode
table = pw.io.csv.read(
    "dataset.csv",
    schema=[
        ("SystemCodeNumber", pw.Type.STRING),
        ("Capacity", pw.Type.INT),
        ("Latitude", pw.Type.FLOAT),
        ("Longitude", pw.Type.FLOAT),
        ("Occupancy", pw.Type.INT),
        ("VehicleType", pw.Type.STRING),
        ("TrafficConditionNearby", pw.Type.STRING),
        ("QueueLength", pw.Type.INT),
        ("IsSpecialDay", pw.Type.INT),
        ("LastUpdatedDate", pw.Type.STRING),
        ("LastUpdatedTime", pw.Type.STRING),
    ],
    mode="streaming_with_deletions"
)

# Combine date and time into single timestamp column
@pw.udf
def combine_timestamp(date: str, time: str) -> str:
    return pd.to_datetime(date + " " + time, dayfirst=True).strftime("%Y-%m-%d %H:%M:%S")

# Apply the compute function and keep intermediate structure
with_logic = table.select(
    lot_id = table.SystemCodeNumber,
    timestamp = combine_timestamp(table.LastUpdatedDate, table.LastUpdatedTime),
    price_and_reroute = compute_competitive_price(
        table.SystemCodeNumber,
        table.Occupancy,
        table.Capacity,
        table.QueueLength,
        table.TrafficConditionNearby,
        table.IsSpecialDay,
        table.VehicleType,
        combine_timestamp(table.LastUpdatedDate, table.LastUpdatedTime)
    )
)

# Unpack the tuple into two columns
enriched = with_logic.select(
    lot_id = with_logic.lot_id,
    timestamp = with_logic.timestamp,
    price = with_logic.price_and_reroute[0],
    reroute = with_logic.price_and_reroute[1]
)

# Output to a file
pw.io.jsonlines.write(enriched, "pricing_output.jsonl")

# Run the pipeline
pw.run()


TypeError: Function pathway.io.csv.read() parameter schema=[('SystemCodeNumber', <pathway.engine.PathwayType object at 0x787330fd2190>), ('Capacity', <p... violates type hint type[pathway.internals.schema.Schema] | None, as list [('SystemCodeNumber', <pathway.engine.PathwayType object at 0x787330fd2190>), ('Capacity', <p... not type or <class "builtins.NoneType">.

In [None]:
# ✅ Step 1: Define your schema class
class ParkingSchema(pw.Schema):
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    LastUpdatedDate: str
    LastUpdatedTime: str

# ✅ Step 2: Read the CSV in streaming mode using the schema
table = pw.io.csv.read(
    "dataset.csv",
    schema=ParkingSchema,
    mode="streaming"  # ✅ Corrected here
)

# ✅ Step 3: Combine date and time into a timestamp string
@pw.udf
def combine_timestamp(date: str, time: str) -> str:
    return pd.to_datetime(date + " " + time, dayfirst=True).strftime("%Y-%m-%d %H:%M:%S")

# ✅ Step 4: Apply pricing logic
with_logic = table.select(
    lot_id = table.SystemCodeNumber,
    timestamp = combine_timestamp(table.LastUpdatedDate, table.LastUpdatedTime),
    price_and_reroute = compute_competitive_price(
        table.SystemCodeNumber,
        table.Occupancy,
        table.Capacity,
        table.QueueLength,
        table.TrafficConditionNearby,
        table.IsSpecialDay,
        table.VehicleType,
        combine_timestamp(table.LastUpdatedDate, table.LastUpdatedTime)
    )
)

# ✅ Step 5: Unpack the price and reroute flag from the tuple
enriched = with_logic.select(
    lot_id = with_logic.lot_id,
    timestamp = with_logic.timestamp,
    price = with_logic.price_and_reroute[0],
    reroute = with_logic.price_and_reroute[1]
)

# ✅ Step 6: Output to a JSONL file (you can visualize later using Bokeh)
pw.io.jsonlines.write(enriched, "pricing_output.jsonl")

# ✅ Step 7: Run the Pathway pipeline
pw.run()
