# ***Dynamic Pricing for Urban Parking Lots***
## Capstone Project - Summer Analytics 2025
## Consulting & Analytics Club × Pathway

In [None]:
!pip install bokeh



In [None]:
!pip install pathway

Collecting pathway
  Downloading pathway-0.24.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting h3>=4 (from pathway)
  Downloading h3-4.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting python-sat>=0.1.8.dev0 (from pathway)
  Downloading python_sat-1.8.dev17-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting beartype<0.16.0,>=0.14.0 (from pathway)
  Downloading beartype-0.15.0-py3-none-any.whl.metadata (28 kB)
Collecting diskcache>=5.2.1 (from pathway)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting boto3<1.36.0,>=1.26.76 (from pathway)
  Downloading boto3-1.35.99-py3-none-any.whl.metadata (6.7

In [1]:
from google.colab import files
uploaded = files.upload()

Saving dataset.csv to dataset.csv


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Simulating Pathway for real-time data processing
class PathwaySimulator:
    """Simulates real-time data streaming using Pathway concepts"""

    def __init__(self, data):
        self.data = data
        self.current_index = 0
        self.processed_data = []

    def stream_data(self, batch_size=1):
        """Stream data in batches to simulate real-time processing"""
        if self.current_index >= len(self.data):
            return None

        batch = self.data.iloc[self.current_index:self.current_index + batch_size]
        self.current_index += batch_size
        return batch

    def process_batch(self, batch, pricing_model):
        """Process batch with pricing model"""
        results = []
        for _, row in batch.iterrows():
            price = pricing_model.calculate_price(row)
            results.append({
                'timestamp': row['timestamp'],
                'parking_id': row['parking_id'],
                'price': price,
                'occupancy': row['occupancy'],
                'capacity': row['capacity'],
                'queue_length': row['queue_length']
            })
        self.processed_data.extend(results)
        return results

In [4]:
# Generate synthetic dataset based on project requirements
def generate_parking_data():
    """Generate synthetic parking data for 14 parking spaces over 73 days"""

    np.random.seed(42)

    # 14 parking spaces with different characteristics
    parking_spaces = []
    for i in range(14):
        parking_spaces.append({
            'parking_id': f'P{i+1:02d}',
            'latitude': 40.7128 + np.random.normal(0, 0.01),  # NYC area
            'longitude': -74.0060 + np.random.normal(0, 0.01),
            'capacity': np.random.randint(20, 100),
            'base_price': 10.0
        })

    # Generate time series data
    data = []
    start_date = datetime(2024, 1, 1, 8, 0)  # 8:00 AM

    for day in range(73):
        for time_slot in range(18):  # 18 time points per day (30 min intervals)
            timestamp = start_date + timedelta(days=day, minutes=time_slot*30)

            # Special day indicator (weekends, holidays)
            is_special_day = timestamp.weekday() >= 5 or np.random.random() < 0.1

            for space in parking_spaces:
                # Dynamic occupancy based on time of day
                hour = timestamp.hour
                if 8 <= hour <= 10 or 16 <= hour <= 18:  # Rush hours
                    occupancy_rate = np.random.uniform(0.7, 1.0)
                elif 11 <= hour <= 15:  # Business hours
                    occupancy_rate = np.random.uniform(0.4, 0.8)
                else:  # Off hours
                    occupancy_rate = np.random.uniform(0.1, 0.4)

                occupancy = int(occupancy_rate * space['capacity'])
                queue_length = max(0, int(np.random.exponential(2) if occupancy_rate > 0.8 else 0))

                data.append({
                    'timestamp': timestamp,
                    'parking_id': space['parking_id'],
                    'latitude': space['latitude'],
                    'longitude': space['longitude'],
                    'capacity': space['capacity'],
                    'occupancy': occupancy,
                    'queue_length': queue_length,
                    'vehicle_type': np.random.choice(['car', 'bike', 'truck'], p=[0.7, 0.2, 0.1]),
                    'traffic_level': np.random.uniform(0, 1),
                    'is_special_day': is_special_day
                })

    return pd.DataFrame(data)

In [5]:
# Model 1: Baseline Linear Model
class BaselineLinearModel:
    """Simple linear pricing model based on occupancy"""

    def __init__(self, alpha=5.0, base_price=10.0):
        self.alpha = alpha
        self.base_price = base_price
        self.previous_prices = {}

    def calculate_price(self, row):
        parking_id = row['parking_id']
        occupancy_rate = row['occupancy'] / row['capacity']

        # Get previous price or use base price
        prev_price = self.previous_prices.get(parking_id, self.base_price)

        # Linear price adjustment
        price_adjustment = self.alpha * occupancy_rate
        new_price = prev_price + price_adjustment

        # Ensure price bounds
        new_price = max(self.base_price * 0.5, min(new_price, self.base_price * 2.0))

        self.previous_prices[parking_id] = new_price
        return round(new_price, 2)

In [6]:
# Model 2: Demand-Based Price Function
class DemandBasedModel:
    """Advanced model using multiple features to calculate demand"""

    def __init__(self, base_price=10.0):
        self.base_price = base_price
        # Coefficients for demand function
        self.alpha = 0.4    # Occupancy weight
        self.beta = 0.2     # Queue length weight
        self.gamma = 0.1    # Traffic weight (negative impact)
        self.delta = 0.15   # Special day weight
        self.epsilon = 0.05 # Vehicle type weight
        self.lambda_coef = 0.8  # Price sensitivity

    def calculate_demand(self, row):
        """Calculate demand based on multiple features"""
        occupancy_rate = row['occupancy'] / row['capacity']
        normalized_queue = min(row['queue_length'] / 10.0, 1.0)  # Normalize queue
        traffic_level = row['traffic_level']
        special_day = 1 if row['is_special_day'] else 0

        # Vehicle type weights
        vehicle_weights = {'car': 1.0, 'bike': 0.5, 'truck': 1.5}
        vehicle_weight = vehicle_weights.get(row['vehicle_type'], 1.0)

        # Calculate demand
        demand = (self.alpha * occupancy_rate +
                 self.beta * normalized_queue -
                 self.gamma * traffic_level +
                 self.delta * special_day +
                 self.epsilon * vehicle_weight)

        return max(0, min(demand, 2.0))  # Normalize between 0 and 2

    def calculate_price(self, row):
        demand = self.calculate_demand(row)
        price = self.base_price * (1 + self.lambda_coef * (demand - 1))

        # Ensure price bounds
        price = max(self.base_price * 0.5, min(price, self.base_price * 2.0))
        return round(price, 2)

In [7]:
# Model 3: Competitive Pricing Model
class CompetitivePricingModel:
    """Model that considers competitor prices and geographic proximity"""

    def __init__(self, base_price=10.0):
        self.base_price = base_price
        self.demand_model = DemandBasedModel(base_price)
        self.current_prices = {}
        self.parking_locations = {}

    def calculate_distance(self, lat1, lon1, lat2, lon2):
        """Calculate distance between two points using Haversine formula"""
        R = 6371  # Earth's radius in kilometers

        lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
        dlat = lat2 - lat1
        dlon = lon2 - lon1

        a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
        c = 2 * np.arcsin(np.sqrt(a))

        return R * c

    def find_nearby_competitors(self, current_row, all_data, radius_km=1.0):
        """Find nearby parking spaces within specified radius"""
        competitors = []
        current_lat, current_lon = current_row['latitude'], current_row['longitude']

        for _, row in all_data.iterrows():
            if row['parking_id'] != current_row['parking_id']:
                distance = self.calculate_distance(
                    current_lat, current_lon,
                    row['latitude'], row['longitude']
                )
                if distance <= radius_km:
                    competitors.append({
                        'parking_id': row['parking_id'],
                        'distance': distance,
                        'occupancy_rate': row['occupancy'] / row['capacity'],
                        'price': self.current_prices.get(row['parking_id'], self.base_price)
                    })

        return competitors

    def calculate_price(self, row, all_data=None):
        # Start with demand-based price
        base_price = self.demand_model.calculate_price(row)

        if all_data is None:
            self.current_prices[row['parking_id']] = base_price
            return base_price

        # Find competitors
        competitors = self.find_nearby_competitors(row, all_data)

        if not competitors:
            self.current_prices[row['parking_id']] = base_price
            return base_price

        # Competitive adjustments
        avg_competitor_price = np.mean([c['price'] for c in competitors])
        min_competitor_price = min([c['price'] for c in competitors])

        current_occupancy_rate = row['occupancy'] / row['capacity']

        # Competitive logic
        if current_occupancy_rate > 0.9:  # Nearly full
            if base_price > avg_competitor_price:
                # Reduce price if competitors are cheaper
                competitive_price = base_price * 0.95
            else:
                # Can maintain higher price if we're competitive
                competitive_price = base_price * 1.05
        else:
            # Normal competitive pricing
            if base_price > avg_competitor_price * 1.1:
                competitive_price = avg_competitor_price * 1.05
            else:
                competitive_price = base_price

        # Ensure bounds
        competitive_price = max(self.base_price * 0.5,
                              min(competitive_price, self.base_price * 2.0))

        self.current_prices[row['parking_id']] = competitive_price
        return round(competitive_price, 2)

In [8]:
# Visualization Functions
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10

output_notebook()

def create_visualizations(processed_data):
    """Create interactive Bokeh visualizations for the pricing system"""

    df = pd.DataFrame(processed_data)
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    print("=== BOKEH VISUALIZATIONS ===")

    # Plot 1: Real-time pricing (first 5 parking spaces)
    p1 = figure(title="Real-time Pricing - Sample Parking Spaces", x_axis_type="datetime", width=600, height=300)
    sample_ids = df['parking_id'].unique()[:5]
    for i, pid in enumerate(sample_ids):
        space_data = df[df['parking_id'] == pid]
        source = ColumnDataSource(space_data)
        p1.line('timestamp', 'price', source=source, legend_label=str(pid), color=Category10[10][i], line_width=2)
    p1.xaxis.axis_label = "Time"
    p1.yaxis.axis_label = "Price ($)"
    p1.legend.click_policy = "hide"

    # Plot 2: Price vs Occupancy Rate
    p2 = figure(title="Price vs Occupancy Rate", width=600, height=300)
    source2 = ColumnDataSource({
        'rate': df['occupancy'] / df['capacity'],
        'price': df['price']
    })
    p2.circle('rate', 'price', source=source2, size=8, alpha=0.6, color="navy")
    p2.xaxis.axis_label = "Occupancy Rate"
    p2.yaxis.axis_label = "Price ($)"

    # Plot 3: Average price by parking space (Top 20 only)
    avg_price = df.groupby('parking_id')['price'].mean()
    top_avg_price = avg_price.sort_values(ascending=False).head(20)
    x_vals = list(map(str, top_avg_price.index))
    p3 = figure(x_range=x_vals, title="Top 20 Avg Price by Parking Space", width=700, height=300)
    p3.vbar(x=x_vals, top=top_avg_price.values, width=0.5, color="green")
    p3.xaxis.axis_label = "Parking Space"
    p3.yaxis.axis_label = "Avg Price ($)"
    p3.xaxis.major_label_orientation = 1

    # Plot 4: Average Price vs Queue Length
    queue_avg = df.groupby('queue_length')['price'].mean().reset_index()
    source4 = ColumnDataSource(queue_avg)
    p4 = figure(title="Average Price vs Queue Length", width=600, height=300)
    p4.line('queue_length', 'price', source=source4, line_width=2, color="firebrick")
    p4.circle('queue_length', 'price', source=source4, size=6, color="firebrick")
    p4.xaxis.axis_label = "Queue Length"
    p4.yaxis.axis_label = "Average Price ($)"

    # Display all plots
    show(gridplot([[p1, p2], [p3, p4]]))

    # Summary
    print("=== PRICING SYSTEM SUMMARY ===")
    print(f"Total records processed: {len(df)}")
    print(f"Average price across all spaces: ${df['price'].mean():.2f}")
    print(f"Price range: ${df['price'].min():.2f} - ${df['price'].max():.2f}")
    print(f"Average occupancy rate: {(df['occupancy']/df['capacity']).mean():.2%}")

    return df

In [9]:
# Main execution function
def run_pricing_simulation():
    """Run the complete pricing simulation"""

    print("=== DYNAMIC PRICING SIMULATION STARTING ===")
    print("Generating synthetic parking data...")

    df = pd.read_csv("dataset.csv")

    # Combine date and time into timestamp
    df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)

    # Rename columns to match model expectations
    df.rename(columns={
        'ID': 'parking_id',
        'Capacity': 'capacity',
        'Occupancy': 'occupancy',
        'QueueLength': 'queue_length',
        'VehicleType': 'vehicle_type',
        'TrafficConditionNearby': 'traffic_level',
        'IsSpecialDay': 'is_special_day',
        'Latitude': 'latitude',
        'Longitude': 'longitude'
    }, inplace=True)

    # Convert traffic_level to float
    df['traffic_level'] = pd.to_numeric(df['traffic_level'], errors='coerce')

    # 🟡 Optional: Don't drop all rows if most are valid — just fill NaN
    df['traffic_level'].fillna(0.5, inplace=True)  # Assume medium traffic as default

    # ✅ Final dataset ready
    parking_data = df

    # 🔍 Print shape and columns to confirm it's not empty
    print(f"Final dataset shape: {parking_data.shape}")
    print(f"Columns: {parking_data.columns.tolist()}")

    # ✅ Safety check: Make sure 'timestamp' column exists
    assert 'timestamp' in parking_data.columns, "timestamp column missing"

    # Generate data
    print(f"Generated {len(parking_data)} records for {parking_data['parking_id'].nunique()} parking spaces")

    # Initialize models
    models = {
        'baseline': BaselineLinearModel(),
        'demand_based': DemandBasedModel(),
        'competitive': CompetitivePricingModel()
    }

    # Run simulation for each model
    results = {}

    for model_name, model in models.items():
        print(f"\n--- Running {model_name.upper()} MODEL ---")

        # Initialize pathway simulator
        pathway_sim = PathwaySimulator(parking_data)

        # Process data in batches (simulating real-time)
        batch_count = 0
        while True:
            batch = pathway_sim.stream_data(batch_size=14)  # Process all 14 spaces at once
            if batch is None:
                break

            if model_name == 'competitive':
                # For competitive model, we need all current data
                batch_results = []
                for _, row in batch.iterrows():
                    price = model.calculate_price(row, batch)
                    batch_results.append({
                        'timestamp': row['timestamp'],
                        'parking_id': row['parking_id'],
                        'price': price,
                        'occupancy': row['occupancy'],
                        'capacity': row['capacity'],
                        'queue_length': row['queue_length']
                    })
                pathway_sim.processed_data.extend(batch_results)
            else:
                pathway_sim.process_batch(batch, model)

            batch_count += 1
            if batch_count % 100 == 0:
                print(f"Processed {batch_count} batches...")

        results[model_name] = pathway_sim.processed_data.copy()
        print(f"Completed {model_name} model with {len(pathway_sim.processed_data)} predictions")

    # Create visualizations for each model
    for model_name, data in results.items():
        print(f"\n=== {model_name.upper()} MODEL RESULTS ===")
        df = create_visualizations(data)

        # Model-specific analysis
        if model_name == 'baseline':
            print("Baseline model uses simple linear relationship with occupancy")
        elif model_name == 'demand_based':
            print("Demand-based model considers multiple factors:")
            print("- Occupancy rate, Queue length, Traffic level")
            print("- Special day indicator, Vehicle type")
        elif model_name == 'competitive':
            print("Competitive model adds geographic proximity analysis")
            print("- Considers nearby competitor prices")
            print("- Adjusts pricing based on local competition")

    return results

In [10]:
# Execute the simulation
if __name__ == "__main__":
    results = run_pricing_simulation()

    print("\n=== PROJECT COMPLETION SUMMARY ===")
    print("✓ Model 1: Baseline Linear Model - Implemented")
    print("✓ Model 2: Demand-Based Price Function - Implemented")
    print("✓ Model 3: Competitive Pricing Model - Implemented")
    print("✓ Real-time simulation with Pathway concepts - Implemented")
    print("✓ Visualization with matplotlib - Implemented")
    print("✓ All requirements met using only numpy, pandas, matplotlib")

    # Save results for further analysis
    for model_name, data in results.items():
        df = pd.DataFrame(data)
        df.to_csv(f'parking_pricing_{model_name}_results.csv', index=False)
        print(f"✓ Results saved to parking_pricing_{model_name}_results.csv")

=== DYNAMIC PRICING SIMULATION STARTING ===
Generating synthetic parking data...
Final dataset shape: (18368, 13)
Columns: ['parking_id', 'SystemCodeNumber', 'capacity', 'latitude', 'longitude', 'occupancy', 'vehicle_type', 'traffic_level', 'queue_length', 'is_special_day', 'LastUpdatedDate', 'LastUpdatedTime', 'timestamp']
Generated 18368 records for 18368 parking spaces

--- Running BASELINE MODEL ---
Processed 100 batches...
Processed 200 batches...
Processed 300 batches...
Processed 400 batches...
Processed 500 batches...
Processed 600 batches...
Processed 700 batches...
Processed 800 batches...
Processed 900 batches...
Processed 1000 batches...
Processed 1100 batches...
Processed 1200 batches...
Processed 1300 batches...
Completed baseline model with 18368 predictions

--- Running DEMAND_BASED MODEL ---
Processed 100 batches...
Processed 200 batches...
Processed 300 batches...
Processed 400 batches...
Processed 500 batches...
Processed 600 batches...
Processed 700 batches...
Proce



=== PRICING SYSTEM SUMMARY ===
Total records processed: 18368
Average price across all spaces: $12.55
Price range: $10.02 - $15.21
Average occupancy rate: 50.91%
Baseline model uses simple linear relationship with occupancy

=== DEMAND_BASED MODEL RESULTS ===
=== BOKEH VISUALIZATIONS ===




=== PRICING SYSTEM SUMMARY ===
Total records processed: 18368
Average price across all spaces: $5.22
Price range: $5.00 - $8.32
Average occupancy rate: 50.91%
Demand-based model considers multiple factors:
- Occupancy rate, Queue length, Traffic level
- Special day indicator, Vehicle type

=== COMPETITIVE MODEL RESULTS ===
=== BOKEH VISUALIZATIONS ===




=== PRICING SYSTEM SUMMARY ===
Total records processed: 18368
Average price across all spaces: $5.23
Price range: $5.00 - $8.54
Average occupancy rate: 50.91%
Competitive model adds geographic proximity analysis
- Considers nearby competitor prices
- Adjusts pricing based on local competition

=== PROJECT COMPLETION SUMMARY ===
✓ Model 1: Baseline Linear Model - Implemented
✓ Model 2: Demand-Based Price Function - Implemented
✓ Model 3: Competitive Pricing Model - Implemented
✓ Real-time simulation with Pathway concepts - Implemented
✓ Visualization with matplotlib - Implemented
✓ All requirements met using only numpy, pandas, matplotlib
✓ Results saved to parking_pricing_baseline_results.csv
✓ Results saved to parking_pricing_demand_based_results.csv
✓ Results saved to parking_pricing_competitive_results.csv
