<a href="https://colab.research.google.com/github/prernakukreja5/Capstone-project/blob/main/Capstone_project_IITG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pathway bokeh panel --quiet




[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.6/77.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.6/777.6 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m52.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
#IMPORTS ---
import numpy as np
import pandas as pd
import pathway as pw
import datetime
import bokeh.plotting
import panel as pn

In [None]:
# --- 3. DATA PREPARATION ---
import pandas as pd

# Load your dataset (replace 'dataset.csv' with your actual file)
df = pd.read_csv('/content/dataset.csv')

# Combine LastUpdatedDate and LastUpdatedTime into a Timestamp column
df['Timestamp'] = pd.to_datetime(
    df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
    format='%d-%m-%Y %H:%M:%S'
)

# Convert TrafficConditionNearby to numerical
traffic_mapping = {'low': 1.0, 'average': 2.0, 'high': 3.0}
df['TrafficConditionNearby'] = df['TrafficConditionNearby'].map(traffic_mapping)  # Remove fillna here!

# ADD THIS LINE:
df['TrafficConditionNearby'] = pd.to_numeric(df['TrafficConditionNearby'], errors='coerce')

# --- CLEANING START ---
numeric_cols = ["ID", "SystemCodeNumber", "Capacity", "Latitude", "Longitude",
                "Occupancy", "TrafficConditionNearby", "QueueLength", "IsSpecialDay"]

for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

df[numeric_cols] = df[numeric_cols].fillna(0.0)
# --- CLEANING END ---

# Sort by time and ID for streaming
df = df.sort_values(['Timestamp', 'ID']).reset_index(drop=True)

# Save relevant columns for streaming
df[[
    "ID", "SystemCodeNumber", "Capacity", "Latitude", "Longitude",
    "Occupancy", "VehicleType", "TrafficConditionNearby", "QueueLength",
    "IsSpecialDay", "Timestamp"
]].to_csv("parking_stream.csv", index=False)

In [None]:
# --- 4. PATHWAY SCHEMA ---
class ParkingSchema(pw.Schema):
    ID: int
    SystemCodeNumber: int
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: float
    QueueLength: int
    IsSpecialDay: int
    Timestamp: str

In [None]:

# --- 5. STREAMING DATA SOURCE ---
data = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

In [None]:
# --- 6. DATETIME PROCESSING ---
fmt = "%Y-%m-%d %H:%M:%S"
data = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)

In [None]:
# --- 7. MODEL PARAMETERS ---
BASE_PRICE = 10.0
alpha, beta, gamma, delta, epsilon, lambd = 1.0, 0.5, 0.3, 0.7, 0.4, 0.8
vehicle_type_weights = {'car': 1.0, 'bike': 0.5, 'truck': 1.5}


In [None]:
# --- AGGREGATION ---
delta_window = (
    data.windowby(
        data.t,
        instance=data.ID * data.day,  # Per lot per day
        window=pw.temporal.tumbling(datetime.timedelta(days=1)),
        behavior=pw.temporal.exactly_once_behavior()
    )
    .reduce(
        t=pw.this._pw_window_end,
        ID=pw.reducers.max(pw.this.ID),
        occ_max=pw.reducers.max(pw.this.Occupancy),
        occ_min=pw.reducers.min(pw.this.Occupancy),
        cap=pw.reducers.max(pw.this.Capacity),
        queue_max=pw.reducers.max(pw.this.QueueLength),
        traffic_max=pw.reducers.max(pw.this.TrafficConditionNearby),
        special_max=pw.reducers.max(pw.this.IsSpecialDay),
        vehicle_type=pw.reducers.max(pw.this.VehicleType),  # Just for demo
    )
)




In [None]:
# --- MODEL 1 ---
delta_window = delta_window.with_columns(
    Price_Model1 = BASE_PRICE + (delta_window.occ_max - delta_window.occ_min) / delta_window.cap
)

In [None]:
# --- MODEL 2: Demand-Based (UDF) ---
@pw.udf
def calc_windowed_price(occ_max, cap, queue_max, traffic_max, special_max, vehicle_type):
    vehicle_weight = vehicle_type_weights.get(str(vehicle_type).lower(), 1.0)
    demand = (
        alpha * (occ_max / cap)
        + beta * queue_max
        - gamma * traffic_max
        + delta * special_max
        + epsilon * vehicle_weight
    )
    norm_demand = np.clip((demand - 0) / 3, 0, 1)
    price = BASE_PRICE * (1 + lambd * norm_demand)
    return float(max(min(price, 2 * BASE_PRICE), 0.5 * BASE_PRICE))

delta_window = delta_window.with_columns(
    Price_Model2 = calc_windowed_price(
        delta_window.occ_max,
        delta_window.cap,
        delta_window.queue_max,
        delta_window.traffic_max,
        delta_window.special_max,
        delta_window.vehicle_type,
    )
)




In [None]:
# Initialize Panel extension for Bokeh (must be at the top)
pn.extension('bokeh')

# Minimal test plot function
def test_plot():
    fig = bokeh.plotting.figure(height=400, width=800, title="Test Plot", x_axis_type="datetime")
    x = np.arange('2024-01-01', '2024-01-10', dtype='datetime64[D]')
    y1 = np.random.rand(len(x))
    y2 = np.random.rand(len(x))
    fig.line(x, y1, color='navy', legend_label='Model 1')
    fig.line(x, y2, color='orange', legend_label='Model 2')
    fig.legend.location = "top_left"
    return fig

# Display the plot in Colab
pn.pane.Bokeh(test_plot())




In [None]:
from bokeh.plotting import figure, output_notebook, show


output_notebook()

# Create sample data
lot_ids = ['A', 'B', 'C']
timestamps = pd.date_range('2024-01-01', periods=24, freq='h')  # use 'h' not 'H'
data = []
np.random.seed(0)
for lot in lot_ids:
    for t in timestamps:
        data.append({
            'Timestamp': t,
            'LotID': lot,
            'Price_Model1': np.random.uniform(10, 20),
            'Price_Model2': np.random.uniform(10, 20),
        })
df = pd.DataFrame(data)

# Plot
p = figure(x_axis_type="datetime", title="Real-Time Pricing Line Plot for Each Parking Space", width=900, height=400)
colors = ['navy', 'firebrick', 'green', 'orange', 'purple']

for i, lot in enumerate(df['LotID'].unique()):
    lot_df = df[df['LotID'] == lot]
    p.line(lot_df['Timestamp'], lot_df['Price_Model1'], color=colors[i % len(colors)], legend_label=f"Lot {lot} - Model1", line_width=2)
    p.line(lot_df['Timestamp'], lot_df['Price_Model2'], color=colors[i % len(colors)], line_dash='dashed', legend_label=f"Lot {lot} - Model2", line_width=2)

p.legend.location = "top_left"
p.xaxis.axis_label = "Time"
p.yaxis.axis_label = "Price"
show(p)

In [None]:
pw.run()

Output()

