In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import re

# Load your data
df = pd.read_csv('/content/drive/MyDrive/dataset.csv')

# Count NaNs in each column
nan_counts = df.isna().sum().sort_values(ascending=False)

print("NaN counts per column:")
print(nan_counts[nan_counts > 0])


# Combine date and time columns if needed
if 'LastUpdatedDate' in df.columns and 'LastUpdatedTime' in df.columns:
    df['Timestamp'] = pd.to_datetime(
        df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
        format='%d-%m-%Y %H:%M:%S'
    )
else:
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Ensure unique LotID
if 'LotID' not in df.columns:
    df['LotID'] = df['Latitude'].astype(str) + '_' + df['Longitude'].astype(str)

# --- Inspect columns with many NaNs ---
nan_counts = df.isna().sum()
print("NaN counts per column:")
print(nan_counts[nan_counts > 0].sort_values(ascending=False))

# If you want to see rows with NaNs in a specific column, e.g. 'TrafficConditionNearby':
if 'TrafficConditionNearby' in df.columns:
    print("\nRows with NaN in TrafficConditionNearby:")
    print(df[df['TrafficConditionNearby'].isna()][['Timestamp', 'LotID', 'TrafficConditionNearby']].head(10))


NaN counts per column:
Series([], dtype: int64)
NaN counts per column:
Series([], dtype: int64)

Rows with NaN in TrafficConditionNearby:
Empty DataFrame
Columns: [Timestamp, LotID, TrafficConditionNearby]
Index: []


In [None]:
# Clean and map TrafficConditionNearby
def extract_traffic(val):
    if isinstance(val, str):
        match = re.search(r'(low|average|high)', val)
        if match:
            return match.group(1)
        else:
            return None
    return val

traffic_map = {'low': 0, 'average': 1, 'high': 2}
df['TrafficConditionNearby'] = df['TrafficConditionNearby'].apply(extract_traffic)
df['TrafficConditionNearby'] = df['TrafficConditionNearby'].map(traffic_map).fillna(0)

# Fill missing values and ensure correct types
df['Capacity'] = pd.to_numeric(df['Capacity'], errors='coerce').fillna(1)
df['Occupancy'] = pd.to_numeric(df['Occupancy'], errors='coerce').fillna(0)
df['QueueLength'] = pd.to_numeric(df['QueueLength'], errors='coerce').fillna(0)
df['TrafficConditionNearby'] = pd.to_numeric(df['TrafficConditionNearby'], errors='coerce').fillna(0)
df['IsSpecialDay'] = pd.to_numeric(df['IsSpecialDay'], errors='coerce').fillna(0)

vehicle_type_weights = {'car': 1.0, 'bike': 0.5, 'truck': 1.5}
df['VehicleTypeWeight'] = df['VehicleType'].map(vehicle_type_weights).fillna(1.0)

df['Norm_Occupancy'] = df['Occupancy'] / df['Capacity']
df['Norm_Queue'] = df['QueueLength'] / (df['QueueLength'].max() if df['QueueLength'].max() != 0 else 1)
df['Norm_Traffic'] = df['TrafficConditionNearby'] / (df['TrafficConditionNearby'].max() if df['TrafficConditionNearby'].max() != 0 else 1)

ALPHA = 1.0
BETA = 0.5
GAMMA = 0.3
DELTA = 0.8
EPSILON = 0.7

df['Raw_Demand'] = (
    ALPHA * df['Norm_Occupancy'] +
    BETA * df['Norm_Queue'] -
    GAMMA * df['Norm_Traffic'] +
    DELTA * df['IsSpecialDay'] +
    EPSILON * df['VehicleTypeWeight']
)

if df['Raw_Demand'].max() != df['Raw_Demand'].min():
    df['Norm_Demand'] = (df['Raw_Demand'] - df['Raw_Demand'].min()) / (df['Raw_Demand'].max() - df['Raw_Demand'].min())
else:
    df['Norm_Demand'] = 0.0

BASE_PRICE = 10.0
LAMBDA = 1.0
df['Price_Model2'] = BASE_PRICE * (1 + LAMBDA * df['Norm_Demand'])
df['Price_Model2'] = df['Price_Model2'].clip(lower=BASE_PRICE*0.5, upper=BASE_PRICE*2)

required_cols = ['Timestamp', 'Occupancy', 'Capacity', 'LotID', 'Price_Model2']
df['Price_Model2'] = pd.to_numeric(df['Price_Model2'], errors='coerce').fillna(BASE_PRICE)
df['Capacity'] = pd.to_numeric(df['Capacity'], errors='coerce').fillna(1)
df['Occupancy'] = pd.to_numeric(df['Occupancy'], errors='coerce').fillna(0)
df['LotID'] = df['LotID'].fillna('unknown')
df['Timestamp'] = df['Timestamp'].fillna('1970-01-01 00:00:00')
df = df.dropna(subset=required_cols)

df[required_cols].to_csv("parking_stream_model2.csv", index=False)
print("Cleaned CSV saved for Pathway. No NaN or None in required columns.")


Cleaned CSV saved for Pathway. No NaN or None in required columns.


In [None]:
import pathway as pw
from bokeh.plotting import figure
from bokeh.io import output_notebook

output_notebook()

class ParkingSchema(pw.Schema):
    Timestamp: str
    Occupancy: int
    Capacity: int
    LotID: str
    Price_Model2: float

data = pw.demo.replay_csv(
    "parking_stream_model2.csv",
    schema=ParkingSchema,
    input_rate=100
)

fmt = "%Y-%m-%d %H:%M:%S"
data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt)
)

result = data_with_time.select(
    t = data_with_time.t,
    LotID = data_with_time.LotID,
    Price_Model2 = data_with_time.Price_Model2
)

def price_plotter(source):
    fig = figure(
        height=400,
        width=800,
        title="Model 2: Demand-Based Price per Lot (Pathway Streamed)",
        x_axis_type="datetime",
    )
    # Plot all lots
    lots = list(source.data['LotID'])
    unique_lots = list(set(lots))
    from bokeh.palettes import Category10, Category20
    colors = list(Category10[10]) + list(Category20[20])
    for i, lot in enumerate(unique_lots):
        mask = [idx for idx, l in enumerate(lots) if l == lot]
        if mask:
            fig.line(
                [source.data['t'][m] for m in mask],
                [source.data['Price_Model2'][m] for m in mask],
                line_width=2,
                color=colors[i % len(colors)],
                legend_label=f"Lot {lot}"
            )
    if unique_lots:
        fig.legend.location = "top_left"
    fig.xaxis.axis_label = 'Time'
    fig.yaxis.axis_label = 'Price ($)'
    return fig

# This will display the plot interactively as Pathway streams data
viz = result.plot(price_plotter, sorting_col="t")

pw.run()


Output()

