<a href="https://colab.research.google.com/github/saksham-42/Summer-Analytics-CAC-IITG/blob/Project/Capstone_Proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [181]:
from google.colab import files
uploaded = files.upload()

Saving dataset.csv to dataset (2).csv


Data Pre Processing + Feature Engineering

In [187]:
import pandas as pd
import numpy as np
from datetime import datetime
from math import radians, cos, sin, sqrt, atan2
from collections import defaultdict

# --- Load dataset ---
df = pd.read_csv("dataset.csv")

# --- Fix & create timestamp column ---
df["LastUpdatedDate"] = df["LastUpdatedDate"].fillna("01-01-1970").astype(str)
df["LastUpdatedTime"] = df["LastUpdatedTime"].fillna("00:00:00").astype(str)
df["Timestamp"] = pd.to_datetime(
    df["LastUpdatedDate"] + " " + df["LastUpdatedTime"],
    dayfirst=True,
    errors="coerce"
)
df = df.dropna(subset=["Timestamp"])

# --- Ensure numeric columns ---
df["Capacity"] = pd.to_numeric(df["Capacity"], errors="coerce").fillna(1).replace(0, 1)
df["Occupancy"] = pd.to_numeric(df["Occupancy"], errors="coerce").fillna(0)
df["QueueLength"] = pd.to_numeric(df["QueueLength"], errors="coerce").fillna(0)
df["IsSpecialDay"] = pd.to_numeric(df["IsSpecialDay"], errors="coerce").fillna(0)

# --- Map vehicle types & traffic ---
vehicle_map = {"bike": 0.5, "car": 1.0, "truck": 1.5}
traffic_map = {"low": 0, "average": 1, "high": 2}

df = df[df["VehicleType"].isin(vehicle_map)]
df["VehicleWeight"] = df["VehicleType"].map(vehicle_map)

df["TrafficConditionNearby"] = df["TrafficConditionNearby"].where(
    df["TrafficConditionNearby"].isin(traffic_map), "average"
)
df["TrafficScore"] = df["TrafficConditionNearby"].map(traffic_map)

# --- Compute occupancy ratio safely ---
df["OccupancyRatio"] = df["Occupancy"] / df["Capacity"]

# --- Sort and reset index ---
df = df.sort_values(["SystemCodeNumber", "Timestamp"]).reset_index(drop=True)

# --- Data Processing Functions ---
def preprocess_data(filepath: str) -> pd.DataFrame:
    df = pd.read_csv(filepath)
    fill_values = {
        "Capacity": 1, "Occupancy": 0, "QueueLength": 0,
        "IsSpecialDay": 0, "VehicleType": "car", "TrafficConditionNearby": "average"
    }
    df.fillna(fill_values, inplace=True)
    df["Latitude"] = df["Latitude"].fillna(df["Latitude"].median())
    df["Longitude"] = df["Longitude"].fillna(df["Longitude"].median())
    return df

Model - 1

In [188]:
def apply_model1(group, base=10, alpha=5):
    prices = []
    last_price = base
    for occ_ratio in group["OccupancyRatio"]:
        price = np.clip(last_price + alpha * occ_ratio, 5, 20)
        prices.append(price)
        last_price = price
    group["Price_Model1"] = prices
    return group

df = df.groupby("SystemCodeNumber").apply(apply_model1).reset_index(drop=True)


⏳ Waiting for streamed_prices.json...
⏳ Waiting for streamed_prices.json...
Waiting for streamed_prices.json...
Error updating data: Expected object or value
Waiting for stream...
Error updating data: Expected object or value
⏳ Waiting for stream... File streamed_prices.json does not exist
⏳ Waiting for streamed_prices.json...
Error updating data: Expected object or value
⏳ Waiting for streamed_prices.json...
⏳ Waiting for streamed_prices.json...
⏳ Waiting for data... (Expected object or value)


Model - 2

In [189]:
A, B, C, D, E, LAMBDA = 4, 1, 1.5, 2, 1, 0.5

df["RawDemand"] = (
    A * df["OccupancyRatio"] +
    B * df["QueueLength"] -
    C * df["TrafficScore"] +
    D * df["IsSpecialDay"] +
    E * df["VehicleWeight"]
)

def normalize(group):
    min_d, max_d = group["RawDemand"].min(), group["RawDemand"].max()
    group["NormalizedDemand"] = (group["RawDemand"] - min_d) / (max_d - min_d + 1e-6)
    return group

df = df.groupby("SystemCodeNumber").apply(normalize).reset_index(drop=True)
df["Price_Model2"] = np.clip(10 * (1 + LAMBDA * df["NormalizedDemand"]), 5, 20)


Model - 3

In [190]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    dlat, dlon = radians(lat2 - lat1), radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    return 2 * R * atan2(sqrt(a), sqrt(1 - a))

coords = df.groupby("SystemCodeNumber")[["Latitude", "Longitude"]].first()
nearby_lots = defaultdict(list)

for lot1, loc1 in coords.iterrows():
    for lot2, loc2 in coords.iterrows():
        if lot1 != lot2 and haversine(loc1["Latitude"], loc1["Longitude"], loc2["Latitude"], loc2["Longitude"]) <= 0.5:
            nearby_lots[lot1].append(lot2)

def apply_model3(df):
    price3 = []
    for _, row in df.iterrows():
        lot, ts, price2 = row["SystemCodeNumber"], row["Timestamp"], row["Price_Model2"]
        occ, cap = row["Occupancy"], row["Capacity"]
        comps = nearby_lots.get(lot, [])
        comp_prices = df[(df["SystemCodeNumber"].isin(comps)) & (df["Timestamp"] == ts)]["Price_Model2"].values
        if len(comp_prices):
            avg_comp = np.mean(comp_prices)
            if occ >= cap and avg_comp < price2:
                price2 -= 1
            elif avg_comp > price2:
                price2 += 0.5
        price3.append(np.clip(price2, 5, 20))
    df["Price_Model3"] = price3
    return df

df = apply_model3(df)


Pathway

In [191]:
!pip install pathway --quiet

import pathway as pw
import pandas as pd
import numpy as np
from math import radians, cos, sin, sqrt, atan2
import datetime
import time
import os
from typing import Dict, List

df = pw.demo.replay_csv("dataset.csv", schema=ParkingSchema, input_rate=1000)

# --- Constants ---
BASE_PRICE = 10.0
MIN_PRICE = 5.0
MAX_PRICE = 20.0
NEARBY_RADIUS_KM = 0.5
EARTH_RADIUS_KM = 6371.0
SIMULATED_DELAY_MS = 0.25

# --- Vehicle and Traffic Factors ---
VEHICLE_FACTORS = {"bike": 0.5, "car": 1.0, "truck": 1.5, "cycle": 0.5}
TRAFFIC_FACTORS = {"low": 0, "average": 1, "high": 2}


def build_nearby_map(df: pd.DataFrame) -> Dict[str, List[str]]:
    coords = df.groupby("SystemCodeNumber")[["Latitude", "Longitude"]].first().to_dict("index")
    nearby_map = {}
    for lot1, coord1 in coords.items():
        nearby_map[lot1] = []
        lat1, lon1 = radians(coord1["Latitude"]), radians(coord1["Longitude"])
        for lot2, coord2 in coords.items():
            if lot1 != lot2:
                dlat = radians(coord2["Latitude"]) - lat1
                dlon = radians(coord2["Longitude"]) - lon1
                a = sin(dlat/2)**2 + cos(lat1) * cos(radians(coord2["Latitude"])) * sin(dlon/2)**2
                if 2 * EARTH_RADIUS_KM * atan2(sqrt(a), sqrt(1-a)) <= NEARBY_RADIUS_KM:
                    nearby_map[lot1].append(lot2)
    return nearby_map

# --- Schema ---
class ParkingSchema(pw.Schema):
    # Match your CSV columns exactly
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    LastUpdatedDate: str
    LastUpdatedTime: str

# --- UDFs ---
@pw.udf
def compute_dynamic_price(
    lot: str, capacity: int, occupancy: int, vehicle_type: str,
    traffic: str, queue: int, special_day: int, nearby_map: Dict[str, List[str]]
) -> dict:
    try:
        ratio = occupancy / max(capacity, 1)
        vehicle_factor = VEHICLE_FACTORS.get(vehicle_type, 1.0)
        traffic_factor = TRAFFIC_FACTORS.get(traffic, 1)

        base = BASE_PRICE + 5 * ratio
        score = 4*ratio + queue - 1.5*traffic_factor + 2*special_day + vehicle_factor
        enhanced = BASE_PRICE * (1 + 0.5 * np.clip((score + 5)/15, 0, 1))

        competitors = [enhanced] + [enhanced for _ in nearby_map.get(lot, [])]
        avg_comp = np.mean(competitors)

        if occupancy >= capacity and avg_comp < enhanced:
            final = np.clip(enhanced - 1, MIN_PRICE, MAX_PRICE)
            reroute = True
        elif avg_comp > enhanced:
            final = np.clip(enhanced + 0.5, MIN_PRICE, MAX_PRICE)
            reroute = False
        else:
            final = np.clip(enhanced, MIN_PRICE, MAX_PRICE)
            reroute = False

        return {
            "base_price": float(np.clip(base, MIN_PRICE, MAX_PRICE)),
            "enhanced_price": float(enhanced),
            "final_price": float(final),
            "should_reroute": reroute,
            "competitor_count": len(nearby_map.get(lot, [])),
            "occupancy_rate": float(ratio)
        }
    except:
        return {
            "base_price": BASE_PRICE,
            "enhanced_price": BASE_PRICE,
            "final_price": BASE_PRICE,
            "should_reroute": False,
            "competitor_count": 0,
            "occupancy_rate": 0.0
        }

@pw.udf
def parse_timestamp(date_str: str, time_str: str) -> pw.DateTimeNaive:
    try:
        return pw.DateTimeNaive.from_datetime(datetime.datetime.strptime(
            f"{date_str} {time_str}", "%d-%m-%Y %H:%M:%S"))
    except:
        return pw.DateTimeNaive.from_datetime(datetime.datetime(1970, 1, 1))

# --- Pipeline ---
def run_parking_pricing_pipeline(input_file: str):
    # Prepare data
    static_df = preprocess_data(input_file)
    nearby_map = build_nearby_map(static_df)

    # Create simulated stream file path
    input_dir = os.path.dirname(input_file)
    simulated_file = os.path.join(input_dir, "simulated_stream.csv") if input_dir else "simulated_stream.csv"

    # Simulate stream
    df = pd.read_csv(input_file)
    with open(simulated_file, 'w') as f:
        f.write(','.join(df.columns) + '\n')
        for _, row in df.iterrows():
            time.sleep(SIMULATED_DELAY_MS/1000)
            f.write(','.join(map(str, row.values)) + '\n')
            f.flush()

    # Process stream
    stream = pw.io.csv.read(
        simulated_file,
        schema=ParkingEvent,
        mode="streaming",
        autocommit_duration_ms=500
    )

    # Add timestamp and compute prices
    priced = (
        stream
        .select(
            *pw.this,
            timestamp=parse_timestamp(pw.this.LastUpdatedDate, pw.this.LastUpdatedTime),
            price_info=compute_dynamic_price(
                pw.this.SystemCodeNumber,
                pw.this.Capacity,
                pw.this.Occupancy,
                pw.this.VehicleType,
                pw.this.TrafficConditionNearby,
                pw.this.QueueLength,
                pw.this.IsSpecialDay,
                nearby_map
            )
        )
    )

    # Daily aggregation - CORRECTED VERSION
    daily = (
        priced
        .groupby(
            pw.this.SystemCodeNumber,
            window=pw.temporal.tumbling(datetime.timedelta(days=1))
        )
        .reduce(
            pw.this.SystemCodeNumber,
            window=pw.this._pw_window,
            max_occupancy=pw.reducers.max(pw.this.Occupancy),
            avg_price=pw.reducers.avg(pw.this.price_info["final_price"]),
            count=pw.reducers.count()
        )
    )

    # Ensure output directory exists
    output_dir = os.path.dirname(input_file) if os.path.dirname(input_file) else "."
    os.makedirs(output_dir, exist_ok=True)

    # Write outputs
    pw.io.jsonlines.write(priced, os.path.join(output_dir, "streamed_prices.jsonl"))
    pw.io.csv.write(daily, os.path.join(output_dir, "daily_aggregates.csv"))
    pw.run()

Panel Dashboard

In [192]:
!pip install panel bokeh pandas --quiet

import pandas as pd
import panel as pn
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool
import pathway as pw
from datetime import datetime
import time

pn.extension()

# --- Widgets & Plot Setup ---
dropdown = pn.widgets.Select(name='Parking Lot', options=[], width=250)
source = ColumnDataSource(data={
    'Timestamp': [],
    'base_price': [],
    'enhanced_price': [],
    'final_price': []
})

plot = figure(
    title="🚗 Real-Time Dynamic Parking Pricing Models",
    x_axis_type='datetime',
    width=900,
    height=450,
    tools="pan,wheel_zoom,box_zoom,reset"
)

# Add three lines for each pricing model
plot.line(x='Timestamp', y='base_price', source=source,
          legend_label='Base Price', color='blue', line_width=2)
plot.line(x='Timestamp', y='enhanced_price', source=source,
          legend_label='Enhanced Price', color='green', line_width=2)
plot.line(x='Timestamp', y='final_price', source=source,
          legend_label='Final Price', color='red', line_width=2)

# Add hover tool
plot.add_tools(HoverTool(
    tooltips=[
        ("Time", "@Timestamp{%F %T}"),
        ("Base", "@base_price{0.00}"),
        ("Enhanced", "@enhanced_price{0.00}"),
        ("Final", "@final_price{0.00}")
    ],
    formatters={"@Timestamp": "datetime"}
))
plot.legend.location = "top_left"
plot.legend.click_policy = "hide"

# --- Data Update Function ---
def update_data():
    try:
        df = pd.read_json("streamed_prices.jsonl", lines=True)
        print("Raw data loaded. Columns:", df.columns.tolist())

        # Parse timestamp (adjust format based on actual data)
        df['Timestamp'] = pd.to_datetime(df['timestamp'].apply(lambda x: x['value']))

        # Update dropdown
        if not dropdown.options:
            lots = df['SystemCodeNumber'].unique().tolist()
            dropdown.options = lots
            if lots:
                dropdown.value = lots[0]

        # Filter and update plot
        if dropdown.value:
            df_lot = df[df['SystemCodeNumber'] == dropdown.value]
            print(f"Data for {dropdown.value}: {len(df_lot)} records")

            source.data = {
                'Timestamp': df_lot['Timestamp'],
                'base_price': df_lot['price_info'].apply(lambda x: x['base_price']),
                'enhanced_price': df_lot['price_info'].apply(lambda x: x['enhanced_price']),
                'final_price': df_lot['price_info'].apply(lambda x: x['final_price'])
            }
    except Exception as e:
        print(f"Error updating data: {str(e)}")

# Initial update + auto-refresh
update_data()
pn.state.add_periodic_callback(update_data, period=1000)

# --- Dashboard Layout ---
dashboard = pn.Column(
    "# 🅿️ Dynamic Parking Pricing Dashboard",
    pn.Row(
        dropdown,
        pn.widgets.StaticText(value="<i>Updates every second</i>")
    ),
    plot,
    sizing_mode="stretch_width"
)

# --- Run the Dashboard ---
dashboard.servable()  # For `panel serve`
# dashboard.show()    # For Jupyter Notebook

Error updating data: Expected object or value


Error updating data: Expected object or value
Error updating data: Expected object or value
Error updating data: Expected object or value
Error updating data: Expected object or value
⏳ Waiting for data... (Expected object or value)
⏳ Waiting for streamed_prices.json...
⏳ Waiting for streamed_prices.json...
Waiting for streamed_prices.json...
Waiting for stream...
⏳ Waiting for stream... File streamed_prices.json does not exist
⏳ Waiting for streamed_prices.json...
⏳ Waiting for streamed_prices.json...
⏳ Waiting for streamed_prices.json...
Error updating data: Expected object or value
Error updating data: Expected object or value
Error updating data: Expected object or value
Error updating data: Expected object or value
⏳ Waiting for data... (Expected object or value)
⏳ Waiting for streamed_prices.json...
⏳ Waiting for streamed_prices.json...
Waiting for streamed_prices.json...
Waiting for stream...
⏳ Waiting for stream... File streamed_prices.json does not exist
⏳ Waiting for streamed