In [17]:
!pip install pathway bokeh



In [18]:
from google.colab import files
uploaded = files.upload()

Saving dataset.csv to dataset (2).csv


In [19]:
!ls

'dataset (1).csv'   dataset.csv         priced_dataset.csv
'dataset (2).csv'   output_prices.csv   sample_data


In [25]:
import pandas as pd
import numpy as np

# Load and preprocess the dataset
df = pd.read_csv('dataset.csv')
df['Datetime'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)
df = df.sort_values(['SystemCodeNumber', 'Datetime'])

# Handle missing values
df['Capacity'] = pd.to_numeric(df['Capacity'], errors='coerce')
df['Occupancy'] = pd.to_numeric(df['Occupancy'], errors='coerce')
df['QueueLength'] = pd.to_numeric(df['QueueLength'], errors='coerce')
df = df.dropna(subset=['Capacity', 'Occupancy'])  # Drop rows where Capacity or Occupancy is NaN

# Compute OccupancyRate
df['OccupancyRate'] = df['Occupancy'] / df['Capacity']

# Initialize parameters
base_price = 10.0
alpha = 5.0  # Price sensitivity to occupancy
threshold = 0.5  # Occupancy threshold for price stability
max_price_change = 2.0  # Cap on price change per time step
min_price, max_price = 5.0, 50.0  # Price bounds

# Initialize price column
df['Price'] = base_price

# Function to calculate next price
def calculate_next_price(prev_price, occupancy_rate, alpha, threshold, max_change, min_price, max_price):
    price_change = alpha * (occupancy_rate - threshold)
    price_change = np.clip(price_change, -max_change, max_change)  # Smooth changes
    new_price = prev_price + price_change
    return np.clip(new_price, min_price, max_price)

# Apply pricing model for each parking space
for parking_lot in df['SystemCodeNumber'].unique():
    mask = df['SystemCodeNumber'] == parking_lot
    lot_data = df[mask].sort_values('Datetime')
    prices = [base_price]

    for i in range(1, len(lot_data)):
        prev_price = prices[-1]
        occupancy_rate = lot_data['OccupancyRate'].iloc[i]
        new_price = calculate_next_price(prev_price, occupancy_rate, alpha, threshold, max_price_change, min_price, max_price)
        prices.append(new_price)

    df.loc[mask, 'Price'] = prices

# Display results
print(df[['SystemCodeNumber', 'Datetime', 'OccupancyRate', 'Price']].head())

# Save output to CSV with explicit datetime format
df.to_csv('cleaned_dataset.csv', index=False, date_format='%d-%m-%Y %H:%M:%S')

# Download the output file
from google.colab import files
files.download('cleaned_dataset.csv')

  SystemCodeNumber            Datetime  OccupancyRate      Price
0      BHMBCCMKT01 2016-10-04 07:59:00       0.105719  10.000000
1      BHMBCCMKT01 2016-10-04 08:25:00       0.110919   8.054593
2      BHMBCCMKT01 2016-10-04 08:59:00       0.138648   6.247834
3      BHMBCCMKT01 2016-10-04 09:32:00       0.185442   5.000000
4      BHMBCCMKT01 2016-10-04 09:59:00       0.259965   5.000000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [21]:
import pandas as pd
df = pd.read_csv('dataset.csv')
print(df.columns)

Index(['ID', 'SystemCodeNumber', 'Capacity', 'Latitude', 'Longitude',
       'Occupancy', 'VehicleType', 'TrafficConditionNearby', 'QueueLength',
       'IsSpecialDay', 'LastUpdatedDate', 'LastUpdatedTime'],
      dtype='object')


In [22]:
print(df['LastUpdatedDate'].head())
print(df['LastUpdatedTime'].head())

0    04-10-2016
1    04-10-2016
2    04-10-2016
3    04-10-2016
4    04-10-2016
Name: LastUpdatedDate, dtype: object
0    07:59:00
1    08:25:00
2    08:59:00
3    09:32:00
4    09:59:00
Name: LastUpdatedTime, dtype: object


In [23]:
from google.colab import files
files.download('output_prices.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
import pandas as pd
import numpy as np

# Load and preprocess the dataset
df = pd.read_csv('dataset.csv')
df['Datetime'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)
df = df.sort_values(['SystemCodeNumber', 'Datetime'])

# Handle missing values
df['Capacity'] = pd.to_numeric(df['Capacity'], errors='coerce')
df['Occupancy'] = pd.to_numeric(df['Occupancy'], errors='coerce')
df['QueueLength'] = pd.to_numeric(df['QueueLength'], errors='coerce')
df = df.dropna(subset=['Capacity', 'Occupancy'])  # Drop rows where Capacity or Occupancy is NaN

# Compute OccupancyRate
df['OccupancyRate'] = df['Occupancy'] / df['Capacity']

# Initialize parameters
base_price = 10.0
alpha = 5.0  # Price sensitivity to occupancy
threshold = 0.5  # Occupancy threshold for price stability
max_price_change = 2.0  # Cap on price change per time step
min_price, max_price = 5.0, 50.0  # Price bounds

# Initialize price column
df['Price'] = base_price

# Function to calculate next price
def calculate_next_price(prev_price, occupancy_rate, alpha, threshold, max_change, min_price, max_price):
    price_change = alpha * (occupancy_rate - threshold)
    price_change = np.clip(price_change, -max_change, max_change)  # Smooth changes
    new_price = prev_price + price_change
    return np.clip(new_price, min_price, max_price)

# Apply pricing model for each parking space
for parking_lot in df['SystemCodeNumber'].unique():
    mask = df['SystemCodeNumber'] == parking_lot
    lot_data = df[mask].sort_values('Datetime')
    prices = [base_price]

    for i in range(1, len(lot_data)):
        prev_price = prices[-1]
        occupancy_rate = lot_data['OccupancyRate'].iloc[i]
        new_price = calculate_next_price(prev_price, occupancy_rate, alpha, threshold, max_price_change, min_price, max_price)
        prices.append(new_price)

    df.loc[mask, 'Price'] = prices

# Display results
print(df[['SystemCodeNumber', 'Datetime', 'OccupancyRate', 'Price']].head())

# Save output to CSV with explicit datetime format
df.to_csv('cleaned_dataset.csv', index=False, date_format='%d-%m-%Y %H:%M:%S')

# Download the output file
from google.colab import files
files.download('cleaned_dataset.csv')

  SystemCodeNumber            Datetime  OccupancyRate      Price
0      BHMBCCMKT01 2016-10-04 07:59:00       0.105719  10.000000
1      BHMBCCMKT01 2016-10-04 08:25:00       0.110919   8.054593
2      BHMBCCMKT01 2016-10-04 08:59:00       0.138648   6.247834
3      BHMBCCMKT01 2016-10-04 09:32:00       0.185442   5.000000
4      BHMBCCMKT01 2016-10-04 09:59:00       0.259965   5.000000


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [27]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()  # Display plots inline in Colab

# Filter data for one parking lot
lot_data = df[df['SystemCodeNumber'] == df['SystemCodeNumber'].iloc[0]]

# Create a line plot
p = figure(title="Price vs Time for Parking Lot", x_axis_type='datetime', x_axis_label='Time', y_axis_label='Price ($)')
p.line(lot_data['Datetime'], lot_data['Price'], legend_label='Price', line_color='blue')
p.line(lot_data['Datetime'], lot_data['OccupancyRate'] * 10, legend_label='Occupancy Rate (scaled)', line_color='red')
show(p)