<a href="https://colab.research.google.com/github/tanishcode-12/assignment-1/blob/main/pathway.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pathway
!pip install pathway bokeh geopy pandas
!pip install pathway bokeh pandas
!pip install -U pathway
!pip install geopy pathway --quiet


Collecting pathway
  Downloading pathway-0.24.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting h3>=4 (from pathway)
  Downloading h3-4.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting python-sat>=0.1.8.dev0 (from pathway)
  Downloading python_sat-1.8.dev17-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting beartype<0.16.0,>=0.14.0 (from pathway)
  Downloading beartype-0.15.0-py3-none-any.whl.metadata (28 kB)
Collecting diskcache>=5.2.1 (from pathway)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting boto3<1.36.0,>=1.26.76 (from pathway)
  Downloading boto3-1.35.99-py3-none-any.whl.metadata (6.7



In [52]:
import pathway
print(pathway.__version__)

0.24.1


# ***Model 1: Baseline Linear Model***




🧩 1. Import Required Libraries

In [42]:
import pathway as pw
import pandas as pd
from datetime import datetime
import time
import numpy as np
from pathway.internals.dtype import DATE_TIME_NAIVE
from geopy.distance import geodesic

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
output_notebook()

📥 2. Load the Dataset

In [51]:
df = pd.read_csv('/content/dataset.csv')
df.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


🧪 3. Combine Date and Time

In [45]:
# Combine date and time columns
df['Datetime'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format="%d-%m-%Y %H:%M:%S")
df = df.sort_values(by='Datetime')

📉 4. Keep Only Required Columns

In [46]:
df = df[['Datetime', 'Occupancy', 'Capacity']]  # Clean DataFrame to retain relevant features

🏗️ 5. Define Pathway Schema

In [43]:
class ParkingSchema(pw.Schema):
    id: int
    datetime: DATE_TIME_NAIVE   # or pt.DATE_TIME_UTC
    occupancy: int
    capacity: int

🔄 6. Simulate Real-Time Row Generation

In [49]:
def generate_csv_rows():
    for i, row in df.iterrows():
        yield {
            "id": i,
            "datetime": row['Datetime'],
            "occupancy": row['Occupancy'],
            "capacity": row['Capacity']
        }
        time.sleep(0.5)  # Simulate real-time stream (500ms delay per row)
        input_table = pw.io.python.read(generate_csv_rows, schema=ParkingSchema)


💰 7. Define Baseline Pricing Function

In [50]:

def baseline_price(i: int, occupancy: int, capacity: int, prev_price: float = 50.0) -> float:
    ratio = occupancy / capacity if capacity != 0 else 0
    return prev_price + 10 * ratio  # α = 10


🔁 8. Simulate Pricing Over Time

In [37]:
def simulate_prices(df, alpha=10, initial_price=50):
    prices = [initial_price]
    for i in range(1, len(df)):
        occ = df.iloc[i]['Occupancy']
        cap = df.iloc[i]['Capacity']
        prev_price = prices[-1]
        ratio = occ / cap if cap else 0
        new_price = prev_price + alpha * ratio
        prices.append(new_price)
    df['PredictedPrice'] = prices
    return df

df = simulate_prices(df)


📊 9. Create Bokeh Plot

In [38]:
source = ColumnDataSource(df)

p = figure(x_axis_type='datetime', title="Baseline Linear Dynamic Pricing", width=800, height=400)
p.line(x='Datetime', y='PredictedPrice', source=source, legend_label='Predicted Price', line_width=2, color='navy')
p.circle(x='Datetime', y='PredictedPrice', source=source, size=8, color='orange', legend_label='Data Point')

hover = HoverTool(tooltips=[
    ("Time", "@Datetime{%F %T}"),
    ("Price", "@PredictedPrice{0.00}"),
    ("Occupancy", "@Occupancy"),
    ("Capacity", "@Capacity")
], formatters={'@Datetime': 'datetime'})

p.add_tools(hover)
p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Price'
p.legend.location = 'top_left'

show(p)




# ***Model 2: Demand-Based Price model***

🔧 1. Import Libraries

In [28]:
import pandas as pd                  # For data manipulation
import pathway as pw                 # For streaming data processing (if used in future stages)
from bokeh.plotting import figure, show, output_notebook  # For interactive visualization
from bokeh.models import ColumnDataSource, HoverTool      # For plotting data and hover interactions
from sklearn.preprocessing import MinMaxScaler            # For scaling numeric values
output_notebook()  # Render Bokeh plots inside Jupyter/Colab notebook


📥 2. Load the Dataset

In [29]:
df = pd.read_csv("/content/dataset.csv")  # Load the CSV file containing parking data
df.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


🧹 3. Data Preprocessing

In [30]:
# Merge date and time into a single datetime column
df['Datetime'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format="%d-%m-%Y %H:%M:%S")
df = df.sort_values(by='Datetime')  # Sort data chronologically

# Encode categorical features to numerical scores
traffic_map = {'low': 1, 'medium': 2, 'high': 3}
vehicle_map = {'bike': 0.5, 'car': 1.0, 'truck': 1.5}

df['TrafficScore'] = df['TrafficConditionNearby'].map(traffic_map)
df['VehicleWeight'] = df['VehicleType'].map(vehicle_map)

# Drop rows where mappings are missing
df.dropna(subset=['TrafficScore', 'VehicleWeight'], inplace=True)


📈 4. Define the Demand Function

In [31]:
# Coefficients for the demand function
α, β, γ, δ, ε = 1.0, 0.5, 0.3, 1.2, 0.7

# Calculate occupancy rate and demand score
df['OccupancyRate'] = df['Occupancy'] / df['Capacity']
df['RawDemand'] = (
    α * df['OccupancyRate'] +
    β * df['QueueLength'] -
    γ * df['TrafficScore'] +
    δ * df['IsSpecialDay'] +
    ε * df['VehicleWeight']
)


🔃 5. Normalize the Demand

In [33]:
scaler = MinMaxScaler()
df['NormalizedDemand'] = scaler.fit_transform(df[['RawDemand']])

💰 6. Compute Dynamic Prices

In [34]:
BasePrice = 100    # Base price for parking
λ = 0.5            # Sensitivity factor for demand

# Adjust price based on normalized demand
df['DynamicPrice'] = BasePrice * (1 + λ * df['NormalizedDemand'])

# Clip prices to a reasonable range (50% to 200% of base)
df['DynamicPrice'] = df['DynamicPrice'].clip(lower=BasePrice * 0.5, upper=BasePrice * 2.0)


🧽 7. Final Data Cleaning

In [35]:
# Remove accidental whitespace from column names
df.columns = df.columns.str.strip()

# Combine and parse datetime again (defensive programming)
df['datetime'] = pd.to_datetime(
    df['LastUpdatedDate'].astype(str).str.strip() + ' ' + df['LastUpdatedTime'].astype(str).str.strip(),
    errors='coerce'  # Mark invalid rows as NaT
)

# Drop rows where datetime parsing failed
df = df.dropna(subset=['datetime'])

# Sort data for time-based plotting
df = df.sort_values('datetime')


📊 8. Visualization with Bokeh

In [36]:
source = ColumnDataSource(df)  # Bokeh's way of binding data to visuals

✨ Hover Tool Setup

In [37]:
hover = HoverTool(
    tooltips=[
        ("Datetime", "@datetime{%F %T}"),
        ("Dynamic Price", "@DynamicPrice{0.00}"),
        ("Occupancy Rate", "@OccupancyRate{0.00}"),
        ("Queue Length", "@QueueLength"),
        ("Traffic", "@TrafficConditionNearby"),
        ("Vehicle", "@VehicleType"),
        ("Is Special Day", "@IsSpecialDay"),
    ],
    formatters={'@datetime': 'datetime'},  # Format datetime in tooltip
    mode='vline'  # Show tooltip when hovering vertically over the chart
)


 📈 Plot Lines for Price & Occupancy

In [38]:
p = figure(
    x_axis_type='datetime',
    title="Demand-Based Dynamic Pricing",
    width=800,
    height=400,
    tools=[hover, "pan", "wheel_zoom", "box_zoom", "reset"]
)

p.line(x='datetime', y='DynamicPrice', source=source, line_width=2, legend_label="Dynamic Price")
p.line(x='datetime', y='OccupancyRate', source=source, line_color='green', line_dash='dashed', legend_label="Occupancy Rate (scaled)")

p.yaxis.axis_label = "Price / Rate"
p.legend.location = "top_left"

show(p)  # Display the interactive plot

In [7]:
import pandas as pd
import numpy as np
import pathway as pw
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from math import radians, cos, sin, asin, sqrt

output_notebook()


In [8]:
df = pd.read_csv("/content/dataset.csv")


In [9]:
def haversine(lat1, lon1, lat2, lon2):
    # convert degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2
    return 6371 * 2 * asin(sqrt(a))  # Distance in km


In [10]:
def compute_proximity(df, threshold_km=1.0):
    n = len(df)
    nearby = {i: [] for i in range(n)}
    for i in range(n):
        for j in range(n):
            if i != j:
                dist = haversine(df.loc[i, 'Latitude'], df.loc[i, 'Longitude'],
                                 df.loc[j, 'Latitude'], df.loc[j, 'Longitude'])
                if dist <= threshold_km:
                    nearby[i].append(j)
    return nearby


In [11]:
def adjust_prices(df):
    df = df.copy()
    proximity_map = compute_proximity(df)
    adjusted_prices = []

    for idx, row in df.iterrows():
        nearby_idxs = proximity_map[idx]
        own_price = row['Price']
        is_full = row['Occupancy'] >= row['Capacity']

        competitor_prices = [df.loc[i, 'Price'] for i in nearby_idxs]

        if competitor_prices:
            min_competitor = min(competitor_prices)
            max_competitor = max(competitor_prices)
        else:
            min_competitor = own_price
            max_competitor = own_price

        # Pricing Logic
        if is_full and own_price > min_competitor:
            new_price = min_competitor - 1
            action = 'Reroute / Lower'
        elif not is_full and own_price < max_competitor:
            new_price = own_price + 1
            action = 'Raise'
        else:
            new_price = own_price
            action = 'Keep'

        adjusted_prices.append((new_price, action))

    df['AdjustedPrice'] = [x[0] for x in adjusted_prices]
    df['Action'] = [x[1] for x in adjusted_prices]
    return df


In [None]:
df = adjust_prices(df)

In [13]:
source = ColumnDataSource(df)

p = figure(title="Competitive Pricing Adjustment",
           x_axis_label='Latitude', y_axis_label='Longitude',
           width=800, height=500, tools="pan,wheel_zoom,reset")

p.circle(x='Latitude', y='Longitude', size=10, source=source,
         color="navy", alpha=0.5, legend_label="Parking Lots")

hover = HoverTool()
hover.tooltips = [
    ("Lot ID", "@id"),
    ("Occupancy", "@Occupancy"),
    ("Capacity", "@Capacity"),
    ("Original Price", "@Price"),
    ("Adjusted Price", "@AdjustedPrice"),
    ("Action", "@Action"),
]
p.add_tools(hover)

p.legend.location = "top_left"
show(p)


