<a href="https://colab.research.google.com/github/susmita99-collab/Summer-Analytics-2025-Assignment/blob/main/Dynamic_Pricing_Final_Fixed_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install pathway bokeh panel --quiet

In [5]:
import numpy as np
import pandas as pd
import pathway as pw
import datetime
import bokeh.plotting
import panel as pn

In [36]:

# 📥 Upload 'dataset.csv' to your Colab session before running this
df = pd.read_csv('dataset.csv')

# ✅ Preview the first few rows
print("✅ First 5 rows of dataset:")
print(df.head())

# ✅ Show column names to confirm schema
print("\n✅ Column names:")
print(df.columns.tolist())

# ✅ Check for nulls just in case
print("\n⚠️ Null values in each column:")
print(df.isnull().sum())

✅ First 5 rows of dataset:
   ID SystemCodeNumber  Capacity   Latitude  Longitude  Occupancy VehicleType  \
0   0      BHMBCCMKT01       577  26.144536  91.736172         61         car   
1   1      BHMBCCMKT01       577  26.144536  91.736172         64         car   
2   2      BHMBCCMKT01       577  26.144536  91.736172         80         car   
3   3      BHMBCCMKT01       577  26.144536  91.736172        107         car   
4   4      BHMBCCMKT01       577  26.144536  91.736172        150        bike   

  TrafficConditionNearby  QueueLength  IsSpecialDay LastUpdatedDate  \
0                    low            1             0      04-10-2016   
1                    low            1             0      04-10-2016   
2                    low            2             0      04-10-2016   
3                    low            2             0      04-10-2016   
4                    low            2             0      04-10-2016   

  LastUpdatedTime  
0        07:59:00  
1        08:25:00  

In [7]:
# Convert date + time to a datetime object
df['Timestamp'] = pd.to_datetime(
    df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
    format='%d-%m-%Y %H:%M:%S'
)

In [8]:
# Sort chronologically and reset index
df = df.sort_values('Timestamp').reset_index(drop=True)

# Save selected features to CSV for Pathway streaming
df[['Timestamp', 'Occupancy', 'Capacity', 'QueueLength',
    'TrafficConditionNearby', 'IsSpecialDay', 'VehicleType']].to_csv("parking_stream.csv", index=False)

In [9]:
# Define the expected structure of streaming data
class ParkingSchema(pw.Schema):
    Timestamp: str
    Occupancy: int
    Capacity: int
    QueueLength: int
    TrafficConditionNearby: int
    IsSpecialDay: int
    VehicleType: str

In [10]:
# Simulate a data stream from the prepared CSV
data = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

In [11]:
# Define the datetime format that matches the 'Timestamp' column
fmt = "%Y-%m-%d %H:%M:%S"

# Convert string timestamp to actual datetime and extract the day
data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),  # full timestamp
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")  # day only
)

In [31]:
# Step 5.0 — Safely define vehicle weight column
with_vehicle_weight = data_with_time.with_columns(
    vehicle_weight = pw.apply(vehicle_type_weight, data_with_time.VehicleType)
)

In [None]:
with_demand = with_vehicle_weight.with_columns(
    OccupancyF = pw.cast(with_vehicle_weight.Occupancy, float),
    CapacityF = pw.cast(with_vehicle_weight.Capacity, float),
    QueueF = pw.cast(with_vehicle_weight.QueueLength, float),
    TrafficF = pw.cast(with_vehicle_weight.TrafficConditionNearby, float),
    SpecialDayF = pw.cast(with_vehicle_weight.IsSpecialDay, float),
    VehicleWeightF = pw.cast(with_vehicle_weight.vehicle_weight, float)
).with_columns(
    demand = (
        0.4 * (pw.this.OccupancyF / pw.this.CapacityF)
        + 0.2 * pw.this.QueueF
        - 0.2 * pw.this.TrafficF
        + 0.1 * pw.this.SpecialDayF
        + 0.1 * pw.this.VehicleWeightF
    )
)

In [None]:
BASE_PRICE = 10
MAX_MULTIPLIER = 2.0
MIN_MULTIPLIER = 0.5

windowed = (
    with_demand.windowby(
        pw.this.t,
        instance=pw.this.day,
        window=pw.temporal.tumbling(datetime.timedelta(days=1)),
        behavior=pw.temporal.exactly_once_behavior()
    )
    .reduce(
        t = pw.this._pw_window_end,
        demand_mean = pw.reducers.mean(pw.this.demand)
    )
    .with_columns(
        price_multiplier = pw.select(
            (pw.this.demand_mean > (MAX_MULTIPLIER - 1), MAX_MULTIPLIER),
            (pw.this.demand_mean < (MIN_MULTIPLIER - 1), MIN_MULTIPLIER),
            default = 1 + pw.this.demand_mean
        ),
        price = BASE_PRICE * pw.this.price_multiplier
    )
)

In [None]:
pn.extension()

def price_plotter(source):
    fig = bokeh.plotting.figure(
        height=400,
        width=800,
        title="Dynamic Parking Price (Daily)",
        x_axis_type="datetime",
        x_axis_label="Date",
        y_axis_label="Price ($)",
        background_fill_color="#f5f5f5"
    )
    fig.line("t", "price", source=source, line_width=2, color="green", legend_label="Price")
    fig.circle("t", "price", source=source, size=6, color="red")
    fig.legend.location = "top_left"
    return fig

viz = windowed.plot(price_plotter, sorting_col="t")
pn.Column(viz).servable()

In [None]:
%%capture --no-display
pw.run()