# **Bus Depot Simulation Project: Modelling**

## Step 1: Import Libraries

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
from datetime import datetime

# Set plotting style for clarity
sns.set_palette("deep")

In [5]:
pip install simpy

Collecting simpy
  Downloading simpy-4.1.1-py3-none-any.whl.metadata (6.1 kB)
Downloading simpy-4.1.1-py3-none-any.whl (27 kB)
Installing collected packages: simpy
Successfully installed simpy-4.1.1


In [6]:
import pandas as pd
import simpy

## Step 2: Load and Prepare the Data

In [20]:
df = pd.read_excel("/content/drive/MyDrive/Simulation_and_Modelling_Project /Cleaned_Data_after_EDA.xlsx")

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 330 entries, 0 to 329
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Bus ID                 330 non-null    int64         
 1   Route                  330 non-null    object        
 2   Day                    330 non-null    int64         
 3   R1 Depart_dt           330 non-null    datetime64[ns]
 4   R1 Arrive_dt           330 non-null    datetime64[ns]
 5   Duration_R1_N          330 non-null    int64         
 6   SOC R1                 330 non-null    int64         
 7   Before Charging WT R1  330 non-null    int64         
 8   Top-Up Start_dt        330 non-null    datetime64[ns]
 9   Top-Up End_dt          330 non-null    datetime64[ns]
 10  Top-Up Dur             330 non-null    int64         
 11  SOC Top-Up             330 non-null    int64         
 12  R2 Depart_dt           330 non-null    datetime64[ns]
 13  R2 Ar

In [22]:
base_time = pd.to_datetime('2025-03-29 00:00:00')
time_cols = [
    'R1 Depart_dt', 'R1 Arrive_dt', 'Top-Up Start_dt', 'Top-Up End_dt',
    'R2 Depart_dt', 'R2 Arrive_dt', 'Overnight Start_dt', 'Overnight End_dt'
]
for col in time_cols:
    df[f'{col}_min'] = (df[col] - base_time).dt.total_seconds() / 60
df_day3 = df[df['Day'] == 3].copy()

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 330 entries, 0 to 329
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Bus ID                  330 non-null    int64         
 1   Route                   330 non-null    object        
 2   Day                     330 non-null    int64         
 3   R1 Depart_dt            330 non-null    datetime64[ns]
 4   R1 Arrive_dt            330 non-null    datetime64[ns]
 5   Duration_R1_N           330 non-null    int64         
 6   SOC R1                  330 non-null    int64         
 7   Before Charging WT R1   330 non-null    int64         
 8   Top-Up Start_dt         330 non-null    datetime64[ns]
 9   Top-Up End_dt           330 non-null    datetime64[ns]
 10  Top-Up Dur              330 non-null    int64         
 11  SOC Top-Up              330 non-null    int64         
 12  R2 Depart_dt            330 non-null    datetime64



---


## Step 3: Set Up the Simulation Environment
We’ll use SimPy to model the depot with 21 charging stations as a shared resource and prepare a dictionary to store validation results for Day 3.


---



In [24]:
# Simulation setup
env = simpy.Environment()
charging_stations = simpy.Resource(env, capacity=21)
results_validation = {
    'R1_wait_times_day3': [],
    'R2_wait_times_day3': [],
    'Top-Up_end_times_day3': [],
    'Overnight_end_times_day3': [],
    'Buses_Charged_by_9AM': {1: [], 2: [], 3: []},
    'SOC_Top-Up_day3': [],
    'SOC_Overnight_day3': []
}



---


## Step 4: Define the Bus Process
Each bus follows a sequence: depart for Route 1 (R1), arrive and charge (top-up), depart for Route 2 (R2), arrive and charge (overnight). We define a bus_process function to simulate this behavior.


---



In [25]:
def bus_process(env, row, charging_stations, results_validation):
    bus_id = row['Bus ID']
    day = row['Day']
    day_offset = (day - 1) * 1440
    top_up_window_start = day_offset + 720  # 12:00 PM
    top_up_window_end = day_offset + 1080  # 6:00 PM
    overnight_window_start = day_offset + 1200  # 8:00 PM
    overnight_window_end = day_offset + 1980  # 9:00 AM next day

    # --- Route 1: Depart and Arrive ---
    if row['R1 Depart_dt_min'] > env.now:
        yield env.timeout(row['R1 Depart_dt_min'] - env.now)
    yield env.timeout(row['Duration_R1_N'])
    r1_arrival_time = env.now

    # --- Top-Up Charging ---
    # Wait until 12:00 PM if early
    if r1_arrival_time < top_up_window_start:
        yield env.timeout(top_up_window_start - r1_arrival_time)
    with charging_stations.request() as req:
        yield req  # Queue if all 21 stations are busy
        wait_time_r1 = env.now - r1_arrival_time  # Includes window wait + queue
        if day == 3:
            results_validation['R1_wait_times_day3'].append((bus_id, wait_time_r1))
        # Charge for Top-Up Dur, validate SOC
        soc_rate = (row['SOC Top-Up'] - row['SOC R1']) / row['Top-Up Dur'] if row['Top-Up Dur'] > 0 else 0
        yield env.timeout(row['Top-Up Dur'])
        top_up_end_time = env.now
        soc_top_up_sim = row['SOC R1'] + soc_rate * row['Top-Up Dur']
        if day == 3:
            results_validation['Top-Up_end_times_day3'].append((bus_id, top_up_end_time))
            results_validation['SOC_Top-Up_day3'].append((bus_id, soc_top_up_sim))
        # Ensure within window
        if top_up_end_time > top_up_window_end:
            print(f"Warning: Bus {bus_id} top-up ended at {top_up_end_time} beyond 6:00 PM")

    # --- Route 2: Depart and Arrive ---
    if row['R2 Depart_dt_min'] > env.now:
        yield env.timeout(row['R2 Depart_dt_min'] - env.now)
    yield env.timeout(row['Duration_R2_N'])
    r2_arrival_time = env.now

    # --- Overnight Charging ---
    # Wait until 8:00 PM if early
    if r2_arrival_time < overnight_window_start:
        yield env.timeout(overnight_window_start - r2_arrival_time)
    with charging_stations.request() as req:
        yield req
        wait_time_r2 = env.now - r2_arrival_time
        if day == 3:
            results_validation['R2_wait_times_day3'].append((bus_id, wait_time_r2))
        # Charge to 100% SOC
        soc_rate = (100 - row['SOC R2']) / row['Overnight Dur'] if row['Overnight Dur'] > 0 else 0
        yield env.timeout(row['Overnight Dur'])
        overnight_end_time = env.now
        soc_overnight_sim = row['SOC R2'] + soc_rate * row['Overnight Dur']
        if day == 3:
            results_validation['Overnight_end_times_day3'].append((bus_id, overnight_end_time))
            results_validation['SOC_Overnight_day3'].append((bus_id, soc_overnight_sim))
        # Check if charged by 9:00 AM
        if overnight_end_time <= overnight_window_end:
            results_validation['Buses_Charged_by_9AM'][day].append(bus_id)



In [26]:
# Run simulation for 3 days until 9:00 AM Day 4
for _, row in df.iterrows():
    env.process(bus_process(env, row, charging_stations, results_validation))
env.run(until=4860)  # 9:00 AM Day 4





---


## Step 5: Start the Simulation
We initiate a process for each bus in the dataset and run the simulation for three days (4320 minutes).


---



In [16]:
# Start a process for each bus
for _, row in df.iterrows():
    env.process(bus_process(env, row, charging_stations, results_validation))

# Run the simulation for 3 days (3 * 1440 minutes)
env.run(until=4900)



---


## Step 6: Validate the Simulation
After running the simulation, we compare Day 3’s simulated wait times and end times with the actual data.


---



In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 330 entries, 0 to 329
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Bus ID                  330 non-null    int64         
 1   Route                   330 non-null    object        
 2   Day                     330 non-null    int64         
 3   R1 Depart_dt            330 non-null    datetime64[ns]
 4   R1 Arrive_dt            330 non-null    datetime64[ns]
 5   Duration_R1_N           330 non-null    int64         
 6   SOC R1                  330 non-null    int64         
 7   Before Charging WT R1   330 non-null    int64         
 8   Top-Up Start_dt         330 non-null    datetime64[ns]
 9   Top-Up End_dt           330 non-null    datetime64[ns]
 10  Top-Up Dur              330 non-null    int64         
 11  SOC Top-Up              330 non-null    int64         
 12  R2 Depart_dt            330 non-null    datetime64

In [27]:
# Validation
r1_wait_day3 = pd.DataFrame(results_validation['R1_wait_times_day3'], columns=['Bus ID', 'Sim_R1_Wait'])
r2_wait_day3 = pd.DataFrame(results_validation['R2_wait_times_day3'], columns=['Bus ID', 'Sim_R2_Wait'])
top_up_end_day3 = pd.DataFrame(results_validation['Top-Up_end_times_day3'], columns=['Bus ID', 'Sim_Top-Up_End'])
overnight_end_day3 = pd.DataFrame(results_validation['Overnight_end_times_day3'], columns=['Bus ID', 'Sim_Overnight_End'])
soc_top_up_day3 = pd.DataFrame(results_validation['SOC_Top-Up_day3'], columns=['Bus ID', 'Sim_SOC_Top-Up'])
soc_overnight_day3 = pd.DataFrame(results_validation['SOC_Overnight_day3'], columns=['Bus ID', 'Sim_SOC_Overnight'])

# Merge with actuals
validation_r1 = r1_wait_day3.merge(df_day3[['Bus ID', 'Before Charging WT R1']], on='Bus ID')
validation_r1['Diff_R1_Wait'] = validation_r1['Sim_R1_Wait'] - validation_r1['Before Charging WT R1']

validation_r2 = r2_wait_day3.merge(df_day3[['Bus ID', 'Before Charging WT R2']], on='Bus ID')
validation_r2['Diff_R2_Wait'] = validation_r2['Sim_R2_Wait'] - validation_r2['Before Charging WT R2']

validation_top_up = top_up_end_day3.merge(df_day3[['Bus ID', 'Top-Up End_dt_min', 'SOC Top-Up']], on='Bus ID')
validation_top_up['Diff_Top-Up_End'] = validation_top_up['Sim_Top-Up_End'] - validation_top_up['Top-Up End_dt_min']
validation_top_up = validation_top_up.merge(soc_top_up_day3, on='Bus ID')
validation_top_up['Diff_SOC_Top-Up'] = validation_top_up['Sim_SOC_Top-Up'] - validation_top_up['SOC Top-Up']

validation_overnight = overnight_end_day3.merge(df_day3[['Bus ID', 'Overnight End_dt_min', 'SOC Overnight']], on='Bus ID')
validation_overnight['Diff_Overnight_End'] = validation_overnight['Sim_Overnight_End'] - validation_overnight['Overnight End_dt_min']
validation_overnight = validation_overnight.merge(soc_overnight_day3, on='Bus ID')
validation_overnight['Diff_SOC_Overnight'] = validation_overnight['Sim_SOC_Overnight'] - validation_overnight['SOC Overnight']

# Output
print("Validation Results for Day 3:\n")
print("R1 Wait Times:")
print(validation_r1.head(10))
print(f"... [{len(validation_r1)} rows total]\n")

print("R2 Wait Times:")
print(validation_r2.head(10))
print(f"... [{len(validation_r2)} rows total]\n")

print("Top-Up End Times and SOC:")
print(validation_top_up[['Bus ID', 'Sim_Top-Up_End', 'Top-Up End_dt_min', 'Diff_Top-Up_End',
                        'Sim_SOC_Top-Up', 'SOC Top-Up', 'Diff_SOC_Top-Up']].head(10))
print(f"... [{len(validation_top_up)} rows total]\n")

print("Overnight End Times and SOC:")
print(validation_overnight[['Bus ID', 'Sim_Overnight_End', 'Overnight End_dt_min', 'Diff_Overnight_End',
                           'Sim_SOC_Overnight', 'SOC Overnight', 'Diff_SOC_Overnight']].head(10))
print(f"... [{len(validation_overnight)} rows total]\n")

print("Summary Statistics:")
print(f"Mean R1 Wait Difference: {validation_r1['Diff_R1_Wait'].mean():.2f} minutes")
print(f"Mean R2 Wait Difference: {validation_r2['Diff_R2_Wait'].mean():.2f} minutes")
print(f"Mean Top-Up End Difference: {validation_top_up['Diff_Top-Up_End'].mean():.2f} minutes")
print(f"Mean Overnight End Difference: {validation_overnight['Diff_Overnight_End'].mean():.2f} minutes")
print(f"Mean SOC Top-Up Difference: {validation_top_up['Diff_SOC_Top-Up'].mean():.2f}%")
print(f"Mean SOC Overnight Difference: {validation_overnight['Diff_SOC_Overnight'].mean():.2f}%")

for day in [1, 2, 3]:
    charged = len(results_validation['Buses_Charged_by_9AM'][day])
    print(f"Day {day}: {charged} buses charged by 9:00 AM")
    if charged < 110:
        print(f"Shortfall: {110 - charged} buses")

Validation Results for Day 3:

R1 Wait Times:
   Bus ID  Sim_R1_Wait  Before Charging WT R1  Diff_R1_Wait
0       2          0.0                      0           0.0
1      97          0.0                      0           0.0
2      81          0.0                      0           0.0
3      65          0.0                      0           0.0
4      34          0.0                      0           0.0
5      19          0.0                      0           0.0
6      50          0.0                      0           0.0
7       1         14.0                     14           0.0
8      96         13.0                     13           0.0
9      33         12.0                     12           0.0
... [110 rows total]

R2 Wait Times:
   Bus ID  Sim_R2_Wait  Before Charging WT R2  Diff_R2_Wait
0      51          0.0                      0           0.0
1      36          0.0                      0           0.0
2       1         29.0                     29           0.0
3      33        