In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv('dataset.csv')

# Display the first few rows
print(df.head())

# Check the columns
print(df.columns)


   ID SystemCodeNumber  Capacity   Latitude  Longitude  Occupancy VehicleType  \
0   0      BHMBCCMKT01       577  26.144536  91.736172         61         car   
1   1      BHMBCCMKT01       577  26.144536  91.736172         64         car   
2   2      BHMBCCMKT01       577  26.144536  91.736172         80         car   
3   3      BHMBCCMKT01       577  26.144536  91.736172        107         car   
4   4      BHMBCCMKT01       577  26.144536  91.736172        150        bike   

  TrafficConditionNearby  QueueLength  IsSpecialDay LastUpdatedDate  \
0                    low            1             0      04-10-2016   
1                    low            1             0      04-10-2016   
2                    low            2             0      04-10-2016   
3                    low            2             0      04-10-2016   
4                    low            2             0      04-10-2016   

  LastUpdatedTime  
0        07:59:00  
1        08:25:00  
2        08:59:00  
3     

In [17]:
import pandas as pd
import numpy as np
from math import radians, sin, cos, sqrt, atan2

# Load dataset
df = pd.read_csv("dataset.csv")

# Preprocess
df['Occupancy'] = pd.to_numeric(df['Occupancy'], errors='coerce')
df['Capacity'] = pd.to_numeric(df['Capacity'], errors='coerce')
df['QueueLength'] = pd.to_numeric(df['QueueLength'], errors='coerce')
df['IsSpecialDay'] = pd.to_numeric(df['IsSpecialDay'], errors='coerce')
df['OccupancyRate'] = df['Occupancy'] / df['Capacity']

# Traffic & Vehicle Mappings
traffic_map = {'low': 1, 'medium': 2, 'high': 3}
vehicle_map = {'car': 1.0, 'bike': 0.5, 'truck': 1.5}
df['TrafficScore'] = df['TrafficConditionNearby'].map(traffic_map)
df['VehicleWeight'] = df['VehicleType'].map(vehicle_map)

# Timestamp column for simulation
# Combine LastUpdatedDate and LastUpdatedTime into one datetime column
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)



In [18]:
def model1_linear(df, alpha=5, base_price=10):
    df['Price_Model1'] = base_price + alpha * df['OccupancyRate']
    df['Price_Model1'] = df['Price_Model1'].clip(lower=5, upper=20)
    return df


In [19]:
def model2_demand(df, base_price=10, lambd=1.2):
    α, β, γ, δ, ε = 1.0, 0.5, 0.7, 1.0, 0.3
    df['DemandScore'] = (
        α * df['OccupancyRate'] +
        β * df['QueueLength'] -
        γ * df['TrafficScore'] +
        δ * df['IsSpecialDay'] +
        ε * df['VehicleWeight']
    )
    d_min, d_max = df['DemandScore'].min(), df['DemandScore'].max()
    df['NormalizedDemand'] = (df['DemandScore'] - d_min) / (d_max - d_min)
    df['Price_Model2'] = base_price * (1 + lambd * df['NormalizedDemand'])
    df['Price_Model2'] = df['Price_Model2'].clip(lower=5, upper=20)
    return df


In [20]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    return R * 2 * atan2(sqrt(a), sqrt(1 - a))

def model3_competitive(df):
    df = model2_demand(df)
    latest_df = df.sort_values("Timestamp").groupby("SystemCodeNumber").tail(1).reset_index(drop=True)

    adjusted_prices = []
    for i, row in latest_df.iterrows():
        lat1, lon1 = row['Latitude'], row['Longitude']
        price2 = row['Price_Model2']
        occupancy = row['Occupancy']
        capacity = row['Capacity']
        
        competitors = []
        for j, other in latest_df.iterrows():
            if i == j: continue
            dist = haversine(lat1, lon1, other['Latitude'], other['Longitude'])
            if dist <= 1.0:
                competitors.append(other['Price_Model2'])

        if competitors:
            avg_comp_price = np.mean(competitors)
            new_price = min(price2, avg_comp_price) if occupancy >= capacity else max(price2, avg_comp_price)
        else:
            new_price = price2
        adjusted_prices.append(new_price)

    latest_df['Price_Model3'] = adjusted_prices
    return latest_df


In [21]:
import time

def simulate_streaming(df, model_func, delay=0.3):
    for i in range(1, len(df)):
        mini_df = df.iloc[:i].copy()
        updated = model_func(mini_df)
        last_price = updated.iloc[-1]['Price_Model3']
        lot = updated.iloc[-1]['SystemCodeNumber']
        print(f"Time {i} → {lot} → Price: ${last_price:.2f}")
        time.sleep(delay)


In [22]:
df = model1_linear(df)
df = model2_demand(df)
final_df = model3_competitive(df)

print(final_df[['SystemCodeNumber', 'Occupancy', 'QueueLength', 'Price_Model1', 'Price_Model2', 'Price_Model3']].head())


   SystemCodeNumber  Occupancy  QueueLength  Price_Model1  Price_Model2  \
0      Broad Street        540            3     13.913043           NaN   
1     Others-CCCPS8        806            3     13.048411           NaN   
2   Others-CCCPS202       1184            2     12.015662     12.267113   
3  Others-CCCPS135a       2533            3     13.261653     13.374311   
4  Others-CCCPS119a       1262            2     12.251159     12.115042   

   Price_Model3  
0           NaN  
1           NaN  
2     12.267113  
3     13.374311  
4     12.115042  


In [23]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
output_notebook()

# Visualize a single lot
lot_id = final_df['SystemCodeNumber'].iloc[0]
lot_df = df[df['SystemCodeNumber'] == lot_id].sort_values("Timestamp")

source = ColumnDataSource(lot_df)

p = figure(title=f"Dynamic Pricing – {lot_id}", x_axis_type='datetime', width=800, height=300)
p.line(x='Timestamp', y='Price_Model1', source=source, color='blue', legend_label="Model 1")
p.line(x='Timestamp', y='Price_Model2', source=source, color='green', legend_label="Model 2")
p.line(x='Timestamp', y='Price_Model3', source=source, color='red', legend_label="Model 3")
p.legend.location = "top_left"
p.xaxis.axis_label = "Time"
p.yaxis.axis_label = "Price ($)"
show(p)


ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : y='Price_Model3' [closest match: 'Price_Model2'] {renderer: GlyphRenderer(id='p1078', ...)}
