In [102]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from datetime import timedelta

SQL file can be run initially from run.sql to populate the values. The following block of code will read all the tables.

In [100]:
engine = create_engine('mysql+pymysql://root:@127.0.0.1:3306/db')
flights = pd.read_sql_table('flights', engine)
arrivals = pd.read_sql_table('arrivals', engine)
drivers = pd.read_sql_table('drivers', engine)
tasks = pd.read_sql_table('tasks', engine)

In [101]:
def read_from_db():
    global flights, arrivals, drivers, tasks
    flights = pd.read_sql_table('flights', engine)
    arrivals = pd.read_sql_table('arrivals', engine)
    drivers = pd.read_sql_table('drivers', engine)
    tasks = pd.read_sql_table('tasks', engine)

In [103]:
arrivals["available_to_pick_up_at"] = arrivals["aibt"] + pd.to_timedelta(
    arrivals["earliest_pickup_minutes"], unit="m"
)
arrivals.sort_values(by="available_to_pick_up_at", inplace=True)

We make an assumption it take "x" minutes to get from thetarmac to the flight's pick up area. In production the following code would probably run in a cron job of every 1 minute to see which driver should be assigned to which flight. For now, we will write it manually.

Here is a utility function to get the next available driver. We simply do a difference of drivers - drivers with tasks (which is all tasks with start time and no end time)



In [129]:
def get_available_driver():
    global drivers, tasks
    # Filter tasks with a start_time but no end_time
    active_tasks = tasks[tasks['start_time'].notnull() & tasks['end_time'].isnull()]
    
    # Get the driver IDs associated with these tasks
    assigned_drivers = active_tasks['driver_id'].unique()
    
    # Get all drivers who are not assigned to active tasks
    available_drivers = drivers[~drivers['driver_id'].isin(assigned_drivers)]
    
    # Check if there are any available drivers
    if available_drivers.empty:
        return None  # Return None if no drivers are available
    
    print(available_drivers)
    # Return the first available driver
    return available_drivers.iloc[0]

In [106]:
get_available_driver()

  driver_id    driver_name
0   Driver1     John Smith
1   Driver2   Emma Johnson
2   Driver3  Michael Brown
3   Driver4    Sarah Davis


driver_id         Driver1
driver_name    John Smith
Name: 0, dtype: object

Driver John Smith will now start his task

This utility function will add a start time to the task and update with a driver_id. Here we'll manually put John Smith and the task number of 1. This function is ACID compliant

In [109]:
def start_task(arrival_id, driver_id, task_number):
    task = {
        "arrival_id": arrival_id,
        "driver_id": driver_id,
        "task_number": task_number,
        "is_completed": 0,
        "start_time": pd.Timestamp.now(),
        "end_time": None
    }
    pd.DataFrame([task]).to_sql("tasks", con=engine, if_exists="append", index=False)
    return task

In [132]:
start_task(arrival_id=arrivals.loc[0, "arrival_id"], driver_id="Driver1", task_number=1)

{'arrival_id': np.int64(1),
 'driver_id': 'Driver1',
 'task_number': 1,
 'is_completed': 0,
 'start_time': Timestamp('2025-03-31 13:24:43.246557'),
 'end_time': None}

Now, that the first task is created, we will proceed to the next available arrival and assign it to the next available driver. We will keep doing this until there is no available driver

Here we just do a refresh of the in-memory values. You can have a peek at what tasks look like

In [111]:
read_from_db()
tasks.head(4)

Unnamed: 0,task_id,arrival_id,driver_id,task_number,start_time,end_time,is_completed
0,1,1,Driver1,1,2025-03-31 13:17:17,NaT,0
1,2,1,Driver1,1,2025-03-31 13:17:34,NaT,0


If you see here, the next available driver for arrivals[1] is Emma Johnson, since John Smith is busy.

In [112]:
get_available_driver()

  driver_id    driver_name
1   Driver2   Emma Johnson
2   Driver3  Michael Brown
3   Driver4    Sarah Davis


driver_id           Driver2
driver_name    Emma Johnson
Name: 1, dtype: object

In [113]:
start_task(arrival_id=arrivals.loc[1, "arrival_id"], driver_id="Driver2", task_number=1)

{'arrival_id': np.int64(2),
 'driver_id': 'Driver2',
 'task_number': 1,
 'is_completed': 0,
 'start_time': Timestamp('2025-03-31 13:18:13.355901'),
 'end_time': None}

In [116]:
read_from_db()
tasks.head(4)

Unnamed: 0,task_id,arrival_id,driver_id,task_number,start_time,end_time,is_completed
0,1,1,Driver1,1,2025-03-31 13:17:17,NaT,0
1,2,1,Driver1,1,2025-03-31 13:17:34,NaT,0
2,3,2,Driver2,1,2025-03-31 13:18:13,NaT,0


In [117]:
get_available_driver()

  driver_id    driver_name
2   Driver3  Michael Brown
3   Driver4    Sarah Davis


driver_id            Driver3
driver_name    Michael Brown
Name: 2, dtype: object

In [118]:
start_task(arrival_id=arrivals.loc[2, "arrival_id"], driver_id="Driver3", task_number=1)

{'arrival_id': np.int64(3),
 'driver_id': 'Driver3',
 'task_number': 1,
 'is_completed': 0,
 'start_time': Timestamp('2025-03-31 13:18:49.247008'),
 'end_time': None}

In [119]:
read_from_db()
tasks.head(4)

Unnamed: 0,task_id,arrival_id,driver_id,task_number,start_time,end_time,is_completed
0,1,1,Driver1,1,2025-03-31 13:17:17,NaT,0
1,2,1,Driver1,1,2025-03-31 13:17:34,NaT,0
2,3,2,Driver2,1,2025-03-31 13:18:13,NaT,0
3,4,3,Driver3,1,2025-03-31 13:18:49,NaT,0


In [120]:
get_available_driver()

  driver_id  driver_name
3   Driver4  Sarah Davis


driver_id          Driver4
driver_name    Sarah Davis
Name: 3, dtype: object

Okay, now we have demonstrated starting of tasks and assigning drivers works well :)

Lets now, assume that the driver John Smith has completed the task. We will update the task to mark it as completed and set the end time. 

If its task_number 1 we will check to see if the first bag check is on time or delayed. If its task number 4, we will check if the last bag check is on time or delayed
NOTE: Not implemented due to lack of time :'(


In [125]:
def end_task(task_id):
    global tasks
    task = tasks[tasks['task_id'] == task_id].iloc[0]
    task['end_time'] = pd.Timestamp.now()
    task['is_completed'] = 1
    tasks.update(task)
    if task['task_number'] == 1:
        # if end_time > first_bag_on_belt_minutes + aibt
        # update as delayed
        pass
    if task['task_number'] == 4:
        # update as delayed or completed
        pass
    tasks.to_sql('tasks', con=engine, if_exists='replace', index=False, method='multi')
    return task

In [None]:
from sqlalchemy import text

def end_task_ai(task_id):
    global tasks
    # Locate the task in the DataFrame
    task_index = tasks[tasks['task_id'] == task_id].index[0]
    task = tasks.loc[task_index]

    # Update the task in memory
    task['end_time'] = pd.Timestamp.now()
    task['is_completed'] = 1

    # # Perform checks based on task_number
    # if task['task_number'] == 1:
    #     # Example: Check if the first bag is delayed
    #     if task['end_time'] > task['aibt'] + pd.Timedelta(minutes=task['first_bag_on_belt_minutes']):
    #         task['status'] = 'Delayed'
    #     else:
    #         task['status'] = 'On Time'
    # elif task['task_number'] == 4:
    #     # Example: Check if the last bag is delayed or completed
    #     if task['end_time'] > task['aibt'] + pd.Timedelta(minutes=task['last_bag_on_belt_minutes']):
    #         task['status'] = 'Delayed'
    #     else:
    #         task['status'] = 'Completed'

    # Update the task in the in-memory DataFrame
    tasks.loc[task_index] = task

    # Update the task in the database
    with engine.connect() as connection:
        update_query = text("""
        UPDATE tasks
        SET end_time = :end_time,
            is_completed = :is_completed,
        WHERE task_id = :task_id
        """)
        connection.execute(update_query, {
            'end_time': task['end_time'],
            'is_completed': task['is_completed'],
            'task_id': int(task_id)  # Convert task_id to Python int
        })

    return task

In [138]:
end_task_ai(task_id=tasks.loc[0, "task_id"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  task['end_time'] = pd.Timestamp.now()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  task['is_completed'] = 1


OperationalError: (pymysql.err.OperationalError) (1054, "Unknown column 'status' in 'field list'")
[SQL: 
        UPDATE tasks
        SET end_time = %(end_time)s,
            is_completed = %(is_completed)s,
            status = %(status)s
        WHERE task_id = %(task_id)s
        ]
[parameters: {'end_time': Timestamp('2025-03-31 13:27:49.500475'), 'is_completed': 1, 'status': None, 'task_id': 1}]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [131]:
read_from_db()
tasks.head(4)

Unnamed: 0,task_id,arrival_id,driver_id,task_number,start_time,end_time,is_completed
0,1,1,Driver1,1,2025-03-31 13:17:17,NaT,0
1,2,1,Driver1,1,2025-03-31 13:17:34,NaT,0
2,3,2,Driver2,1,2025-03-31 13:18:13,NaT,0
3,4,3,Driver3,1,2025-03-31 13:18:49,NaT,0


In [128]:
get_available_driver()

  driver_id  driver_name
3   Driver4  Sarah Davis


driver_id          Driver4
driver_name    Sarah Davis
Name: 3, dtype: object