<a href="https://colab.research.google.com/github/snehapathak9/Capstone-Project/blob/main/Dynamic_Pricing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Install required packages
!pip install bokeh pandas numpy --quiet


In [6]:
# Import libraries
import pandas as pd
import numpy as np
import time
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
from IPython.display import clear_output

output_notebook()


In [7]:
#Load dataset
df = pd.read_csv("/content/dataset.csv")
df.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


In [8]:
# Step 3: Preprocess Data with Verified Categories
def preprocess(df):
    df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format="%d-%m-%Y %H:%M:%S")
    df.sort_values(by=['SystemCodeNumber', 'Timestamp'], inplace=True)

    # Map traffic levels from actual data
    traffic_map = {
        'low': 0.2,
        'average': 0.5,
        'high': 0.8
    }

    # Map vehicle weights from actual data
    vehicle_map = {
        'bike': 0.6,
        'car': 1.0,
        'truck': 1.5,
        'cycle': 0.3
    }

    df['TrafficLevel'] = df['TrafficConditionNearby'].map(traffic_map).fillna(0.5)
    df['VehicleTypeWeight'] = df['VehicleType'].map(vehicle_map).fillna(1.0)
    df['OccupancyRate'] = df['Occupancy'] / df['Capacity']
    return df

df = preprocess(df)

In [9]:
# Map categorical variables into numerical form
traffic_map = {'low': 0.2, 'average': 0.5, 'high': 0.8}
vehicle_map = {'bike': 0.6, 'car': 1.0, 'truck': 1.5, 'cycle': 0.3}

df['TrafficLevel'] = df['TrafficConditionNearby'].map(traffic_map).fillna(0.5)
df['VehicleWeight'] = df['VehicleType'].map(vehicle_map).fillna(1.0)
df['OccupancyRate'] = df['Occupancy'] / df['Capacity'].replace(0, np.nan)


In [10]:
# Choose one lot for visualization and streaming
lot_id = df['SystemCodeNumber'].unique()[0]
df_lot = df[df['SystemCodeNumber'] == lot_id].copy().reset_index(drop=True)

print(f"Simulating real-time pricing for Lot: {lot_id}")
df_lot.head()


Simulating real-time pricing for Lot: BHMBCCMKT01


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp,TrafficLevel,VehicleTypeWeight,OccupancyRate,VehicleWeight
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0.2,1.0,0.105719,1.0
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00,2016-10-04 08:25:00,0.2,1.0,0.110919,1.0
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00,2016-10-04 08:59:00,0.2,1.0,0.138648,1.0
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00,2016-10-04 09:32:00,0.2,1.0,0.185442,1.0
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00,2016-10-04 09:59:00,0.2,0.6,0.259965,0.6


In [11]:
# Model 1: Linear Price Based on Occupancy
def compute_model1_price(prev_price, occ_rate, alpha=2):
    price = prev_price + alpha * occ_rate
    return np.clip(price, 5, 20)


In [12]:
# Stream and visualize Model 1 Pricing
prev_price = 10
streamed_prices = []
timestamps = []

source = ColumnDataSource(data=dict(x=[], y=[]))
p = figure(title="Live Model 1 Price", x_axis_type='datetime', width=800, height=300)
p.line(x='x', y='y', source=source, line_width=2)

for idx, row in df_lot.iterrows():
    occ_rate = row['OccupancyRate']
    ts = row['Timestamp']

    new_price = compute_model1_price(prev_price, occ_rate)
    prev_price = new_price

    streamed_prices.append(new_price)
    timestamps.append(ts)

    source.stream({'x': [ts], 'y': [new_price]}, rollover=50)

    clear_output(wait=True)
    show(p)
    time.sleep(0.5)


In [13]:
# Model 2: Demand-Based Dynamic Pricing
def compute_demand(row, alpha=0.6, beta=0.4, gamma=0.3, delta=1.0, epsilon=0.7):
    return (
        alpha * row['OccupancyRate'] +
        beta * row['QueueLength'] -
        gamma * row['TrafficLevel'] +
        delta * row['IsSpecialDay'] +
        epsilon * row['VehicleWeight']
    )

df_lot['Demand'] = df_lot.apply(compute_demand, axis=1)
d_min, d_max = df_lot['Demand'].min(), df_lot['Demand'].max()
df_lot['NormDemand'] = (df_lot['Demand'] - d_min) / (d_max - d_min + 1e-6)

def compute_model2_price(norm_demand, base_price=10, lambd=0.5):
    price = base_price * (1 + lambd * norm_demand)
    return np.clip(price, 5, 20)

df_lot['Price_Model2'] = df_lot['NormDemand'].apply(lambda d: compute_model2_price(d))
df_lot[['Timestamp', 'Price_Model2']].head()


Unnamed: 0,Timestamp,Price_Model2
0,2016-10-04 07:59:00,10.604884
1,2016-10-04 08:25:00,10.607461
2,2016-10-04 08:59:00,10.951751
3,2016-10-04 09:32:00,10.974952
4,2016-10-04 09:59:00,10.780523


In [14]:
from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    return 6371 * c  # km

df['Price_Model2'] = df.groupby('SystemCodeNumber')['OccupancyRate'].transform(
    lambda x: compute_model2_price(
        ((x - x.min()) / (x.max() - x.min() + 1e-6)).fillna(0)
    )
)

locations = df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].mean()

def apply_model3(row):
    lat1, lon1 = row['Latitude'], row['Longitude']
    nearby_prices = []
    for other_lot in df['SystemCodeNumber'].unique():
        if other_lot != row['SystemCodeNumber']:
            lat2, lon2 = locations.loc[other_lot]
            if haversine(lon1, lat1, lon2, lat2) < 0.5:
                comp_price = df[
                    (df['SystemCodeNumber'] == other_lot) &
                    (df['Timestamp'] == row['Timestamp'])
                ]['Price_Model2']
                if not comp_price.empty:
                    nearby_prices.append(comp_price.values[0])
    if not nearby_prices:
        return row['Price_Model2']

    avg_comp_price = np.mean(nearby_prices)
    if row['OccupancyRate'] > 0.95 and avg_comp_price < row['Price_Model2']:
        return avg_comp_price
    elif avg_comp_price > row['Price_Model2']:
        return min(row['Price_Model2'] * 1.1, 20)
    return row['Price_Model2']

df['Price_Model3'] = df.apply(apply_model3, axis=1)
df[['SystemCodeNumber', 'Timestamp', 'Price_Model3']].head()


Unnamed: 0,SystemCodeNumber,Timestamp,Price_Model3
0,BHMBCCMKT01,2016-10-04 07:59:00,11.568301
1,BHMBCCMKT01,2016-10-04 08:25:00,11.597197
2,BHMBCCMKT01,2016-10-04 08:59:00,11.751313
3,BHMBCCMKT01,2016-10-04 09:32:00,12.011383
4,BHMBCCMKT01,2016-10-04 09:59:00,12.425568


In [15]:
from bokeh.palettes import Category10

# Select one parking lot to compare pricing models
lot_id = df_lot['SystemCodeNumber'].iloc[0]
df_lot_sorted = df_lot.sort_values("Timestamp")

# Create the plot
p = figure(title=f"Price Comparison for Lot: {lot_id}", x_axis_type='datetime', width=900, height=300)
p.line(df_lot_sorted["Timestamp"], df_lot_sorted["Price_Model2"], color=Category10[3][0], legend_label="Model 2")
p.line(df_lot_sorted["Timestamp"], streamed_prices, color=Category10[3][1], legend_label="Model 1")
p.line(df[df["SystemCodeNumber"] == lot_id]["Timestamp"],
       df[df["SystemCodeNumber"] == lot_id]["Price_Model3"],
       color=Category10[3][2], legend_label="Model 3")

p.legend.location = "top_left"
p.xaxis.axis_label = "Time"
p.yaxis.axis_label = "Price ($)"
show(p)


In [16]:
# Plot occupancy rate and Model 2 price side-by-side
df_lot_sorted = df_lot.sort_values("Timestamp")

p2 = figure(title=f"Occupancy Rate vs Model 2 Price: Lot {lot_id}", x_axis_type='datetime', width=900, height=300)
p2.line(df_lot_sorted["Timestamp"], df_lot_sorted["OccupancyRate"], color="blue", legend_label="Occupancy Rate", line_dash="dashed")
p2.line(df_lot_sorted["Timestamp"], df_lot_sorted["Price_Model2"], color="green", legend_label="Model 2 Price")

p2.legend.location = "top_left"
p2.xaxis.axis_label = "Time"
p2.yaxis.axis_label = "Value"
show(p2)


In [17]:
# Plot Model 3 price evolution for 4 lots
from bokeh.layouts import gridplot

plots = []
unique_lots = df['SystemCodeNumber'].unique()[:4]

for lot in unique_lots:
    lot_data = df[df['SystemCodeNumber'] == lot].sort_values("Timestamp")

    p = figure(title=f"Model 3 Price for Lot: {lot}", x_axis_type='datetime', width=400, height=250)
    p.line(lot_data["Timestamp"], lot_data["Price_Model3"], line_width=2)
    p.xaxis.axis_label = "Time"
    p.yaxis.axis_label = "Price ($)"

    plots.append(p)

# Arrange 2 plots per row
grid = gridplot([plots[i:i+2] for i in range(0, len(plots), 2)])
show(grid)
