<a href="https://colab.research.google.com/github/richa999513/SummerAnalyticsCapstoneProject/blob/main/Capstone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pathway bokeh --quiet

In [None]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
import time

output_notebook()


Data cleaning

In [None]:
# Load your data
df = pd.read_csv('/content/sample_data/dataset.csv')  # Replace with your path

# Combine date and time into timestamp
df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

# Rename for convenience
df = df.rename(columns={
    'ID': 'lot_id',
    'SystemCodeNumber': 'code',
    'Capacity': 'capacity',
    'Latitude': 'lat',
    'Longitude': 'lon',
    'Occupancy': 'occupancy',
    'VehicleType': 'vehicle_type',
    'TrafficConditionNearby': 'traffic',
    'QueueLength': 'queue',
    'IsSpecialDay': 'special_day'
})

# Map traffic to numeric scale
traffic_map = {'low': 0, 'average': 1, 'high': 2}
df['traffic_level'] = df['traffic'].map(traffic_map)

# Preview
df[['timestamp', 'lot_id', 'occupancy', 'queue', 'vehicle_type', 'traffic_level']].head()


Unnamed: 0,timestamp,lot_id,occupancy,queue,vehicle_type,traffic_level
0,2016-10-04 07:59:00,0,61,1,car,0
1,2016-10-04 08:25:00,1,64,1,car,0
2,2016-10-04 08:59:00,2,80,2,car,0
3,2016-10-04 09:32:00,3,107,2,car,0
4,2016-10-04 09:59:00,4,150,2,bike,0


## Model 1 : Pricing Baseline Model

In [None]:
BASE_PRICE = 10
ALPHA = 5.0

def linear_price(prev_price, occupancy, capacity):
    if capacity == 0:
        return prev_price
    return round(prev_price + ALPHA * (occupancy / capacity), 2)


## Model 2 : Demand Based Pricing

In [None]:
# Model 1 parameters
alpha = 5.0
base_price = 10

# Pathway-like stream simulator
def stream_data(data, delay=0.5):
    for _, row in data.iterrows():
        yield row
        time.sleep(delay)

# prepare
time_steps = []
prices = []

price_t = base_price

output_notebook()

for i, row in enumerate(stream_data(df.head(30), delay=0.5)):

    occ_ratio = row['Occupancy'] / row['Capacity']
    price_t = price_t + alpha * occ_ratio
    price_t = np.clip(price_t, 0.5 * base_price, 2 * base_price)

    time_steps.append(i)
    prices.append(price_t)

    # re-plot on each step (safer in Colab)
    p = figure(title="Model 1 Linear Pricing", x_axis_label="Time step", y_axis_label="Price ($)", width=700, height=400)
    p.line(time_steps, prices, line_width=2, color="red", legend_label="My Lot Price")
    show(p)

    print(f"Time {i}: price ${price_t:.2f}")


Time 0: price $10.21


Time 1: price $10.43


Time 2: price $10.71


Time 3: price $11.08


Time 4: price $11.60


Time 5: price $12.21


Time 6: price $12.97


Time 7: price $13.83


Time 8: price $14.73


Time 9: price $15.65


Time 10: price $16.58


Time 11: price $17.49


Time 12: price $18.32


Time 13: price $19.06


Time 14: price $19.73


Time 15: price $20.00


Time 16: price $20.00


Time 17: price $20.00


Time 18: price $20.00


Time 19: price $20.00


Time 20: price $20.00


Time 21: price $20.00


Time 22: price $20.00


Time 23: price $20.00


Time 24: price $20.00


Time 25: price $20.00


Time 26: price $20.00


Time 27: price $20.00


Time 28: price $20.00


Time 29: price $20.00


In [None]:
import pandas as pd
import numpy as np
import pathway as pw
from bokeh.plotting import figure, output_notebook, show
import time

# Load your CSV
df = pd.read_csv('/content/sample_data/dataset.csv')

# mappings
traffic_weights = {'low': 0.2, 'average': 0.5, 'high': 0.8}
vehicle_weights = {'car': 1.0, 'bike': 0.5, 'truck': 1.5, 'cycle': 0.3}

# Model 2 parameters
alpha = 1.0
beta = 0.3
gamma = 0.2
delta = 1.0
epsilon = 0.5
lambda_coeff = 0.6
base_price = 10

# Pathway-like streaming simulator
def stream_data(data, delay=0.5):
    for _, row in data.iterrows():
        yield row
        time.sleep(delay)

# prepare data lists
time_steps = []
prices = []

# Bokeh
output_notebook()

for i, row in enumerate(stream_data(df.head(30), delay=0.5)):

    occ_ratio = row['Occupancy'] / row['Capacity']
    traffic_weight = traffic_weights.get(row['TrafficConditionNearby'], 0.5)
    vehicle_weight = vehicle_weights.get(row['VehicleType'], 1.0)

    demand = (
        alpha * occ_ratio
        + beta * row['QueueLength']
        - gamma * traffic_weight
        + delta * row['IsSpecialDay']
        + epsilon * vehicle_weight
    )

    norm_demand = np.clip(demand / 10, 0, 1)

    price = base_price * (1 + lambda_coeff * norm_demand)
    price = np.clip(price, 0.5 * base_price, 2 * base_price)

    time_steps.append(i)
    prices.append(price)

    # re-plot on each iteration (robust in Colab)
    p = figure(title="Model 2 Dynamic Pricing", x_axis_label="Time step", y_axis_label="Price ($)", width=700, height=400)
    p.line(time_steps, prices, line_width=2, color="green", legend_label="My Lot Price")
    show(p)

    print(f"Time {i}: price ${price:.2f}")


Time 0: price $10.52


Time 1: price $10.52


Time 2: price $10.72


Time 3: price $10.75


Time 4: price $10.64


Time 5: price $11.00


Time 6: price $11.66


Time 7: price $11.40


Time 8: price $11.20


Time 9: price $11.77


Time 10: price $11.74


Time 11: price $11.74


Time 12: price $11.35


Time 13: price $11.15


Time 14: price $10.83


Time 15: price $10.81


Time 16: price $10.47


Time 17: price $10.75


Time 18: price $10.51


Time 19: price $10.48


Time 20: price $10.71


Time 21: price $10.69


Time 22: price $10.93


Time 23: price $11.21


Time 24: price $10.96


Time 25: price $11.13


Time 26: price $11.47


Time 27: price $11.65


Time 28: price $11.65


Time 29: price $11.62


## Model 3: Competitive pricing

In [None]:
import pandas as pd
import numpy as np
import pathway as pw
from bokeh.plotting import figure, output_notebook, show
from bokeh.io import output_notebook
import time

# Load your CSV
df = pd.read_csv('/content/sample_data/dataset.csv')

# mappings
traffic_weights = {'low': 0.2, 'average': 0.5, 'high': 0.8}
vehicle_weights = {'car': 1.0, 'bike': 0.5, 'truck': 1.5, 'cycle': 0.3}

# Model parameters
alpha = 1.0
beta = 0.3
gamma = 0.2
delta = 1.0
epsilon = 0.5
lambda_coeff = 0.6
base_price = 10

# competitors
competitors = pd.DataFrame({
    'ID': ['COMP1', 'COMP2', 'COMP3'],
    'Latitude': [26.145, 26.143, 26.146],
    'Longitude': [91.737, 91.735, 91.738],
    'CurrentPrice': [11.0, 9.5, 12.0]
})

def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    dlat = np.radians(lat2-lat1)
    dlon = np.radians(lon2-lon1)
    a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon/2)**2
    return R * 2 * np.arcsin(np.sqrt(a))

# stream simulator
def stream_data(data, delay=0.5):
    for _, row in data.iterrows():
        yield row
        time.sleep(delay)

# prepare data
time_steps = []
prices = []
competitor_prices_history = {c: [] for c in competitors['ID']}

output_notebook()

for i, row in enumerate(stream_data(df.head(30), delay=0.5)):

    occ_ratio = row['Occupancy'] / row['Capacity']
    traffic_weight = traffic_weights.get(row['TrafficConditionNearby'], 0.5)
    vehicle_weight = vehicle_weights.get(row['VehicleType'], 1.0)

    demand = (
        alpha * occ_ratio
        + beta * row['QueueLength']
        - gamma * traffic_weight
        + delta * row['IsSpecialDay']
        + epsilon * vehicle_weight
    )

    norm_demand = np.clip(demand/10, 0, 1)

    price = base_price * (1 + lambda_coeff * norm_demand)
    price = np.clip(price, 0.5*base_price, 2*base_price)

    # competitive logic
    lat, lon = row['Latitude'], row['Longitude']
    competitors_nearby = []
    for idx, comp in competitors.iterrows():
        dist = haversine(lat, lon, comp['Latitude'], comp['Longitude'])
        if dist < 1.0:
            competitors_nearby.append((comp['ID'], comp['CurrentPrice'], dist))

    if occ_ratio >= 0.9 and competitors_nearby:
        cheaper_comp = min(competitors_nearby, key=lambda x:x[1])
        if cheaper_comp[1] < price:
            price = cheaper_comp[1] - 0.5
            price = max(price, 0.5*base_price)
            print(f"Suggest rerouting to {cheaper_comp[0]} at ${cheaper_comp[1]:.2f}")
        else:
            price = price * 1.05
            price = min(price, 2*base_price)

    time_steps.append(i)
    prices.append(price)

    # simulate competitor price change
    for c in competitors['ID']:
        new_comp_price = competitors.loc[competitors['ID']==c, 'CurrentPrice'].values[0] + np.random.uniform(-0.2, 0.2)
        competitors.loc[competitors['ID']==c, 'CurrentPrice'] = new_comp_price
        competitor_prices_history[c].append(new_comp_price)

    # re-plot each time step
    p = figure(title="Model 3 Pricing", x_axis_label="Time", y_axis_label="Price ($)", width=700, height=400)
    p.line(time_steps, prices, legend_label="My Lot", line_width=2, color="blue")
    for c in competitors['ID']:
        p.line(time_steps, competitor_prices_history[c], legend_label=c, line_width=2)
    show(p)
