# First algorithm Exploration

Let's get this project starting!

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import os

In [2]:
# Import data
df_events = pd.read_csv('../data/input_data/events.csv')
df_halls = pd.read_csv('../data/input_data/halls.csv')
df_parking_lots_capacity = pd.read_csv('../data/input_data/parking_lots_capacity.csv')
df_parking_lots_distances = pd.read_csv('../data/input_data/parking_lots_distances.csv')

In [3]:
# Format data to datetime dd.mm.yyyy
df_events['date'] = pd.to_datetime(df_events['date'], format='%d.%m.%y')
df_parking_lots_capacity['valid_from'] = pd.to_datetime(df_parking_lots_capacity['valid_from'], format='%d.%m.%y')
df_parking_lots_capacity['valid_to'] = pd.to_datetime(df_parking_lots_capacity['valid_to'], format='%d.%m.%y')

In [4]:
# In this version each event has one hall
df_events.head()

Unnamed: 0,event,date,status,demand,hall,entrance
0,inhorgenta,2025-02-17,aufbau,280,A1,west
1,inhorgenta,2025-02-18,aufbau,330,A1,west
2,inhorgenta,2025-02-19,aufbau,420,A1,west
3,inhorgenta,2025-02-20,aufbau,420,A1,west
4,inhorgenta,2025-02-21,laufzeit,2278,A1,west


In [5]:
# Each hall has a distance to each entrance (with aritficial numbers for now)
df_halls.head()

Unnamed: 0,hall,distance_north,distance_north_east,distance_east,distance_west,distance_north_west
0,A1,200,300,160,60,100
1,A2,175,260,140,80,140
2,A3,150,220,120,100,180
3,A4,150,180,100,120,220
4,A5,175,140,80,140,260


In [6]:
# Each parking lot has a capacity to a given time (may change due to construnctions, etc)
df_parking_lots_capacity.head()

Unnamed: 0,parking_lot,capacity,valid_from,valid_to
0,P1 Nord (Tor 17a - Tor 11c),2750,2025-01-01,2025-03-02
1,P1 Nord (westl. Tor 17a),350,2025-01-01,2025-03-02
2,P2 Nord (östl. Tor 11c),500,2025-01-01,2025-03-02
3,P3,200,2025-01-01,2025-03-02
4,P4,100,2025-01-01,2025-03-02


In [7]:
# Each parking lot also has a distance to each entrance (with aritficial numbers for now)
df_parking_lots_distances.head()

Unnamed: 0,parking_lot,distance_north,distance_north_east,distance_east,distance_west,distance_north_west
0,P1 Nord (Tor 17a - Tor 11c),400,430,600,1000,900
1,P1 Nord (westl. Tor 17a),380,410,580,950,850
2,P2 Nord (östl. Tor 11c),420,390,500,1300,1200
3,P3,700,720,750,1300,1200
4,P4,800,830,850,1400,1300


In [8]:
# Merge events with halls to figure out distances of the events to the entrances
df_events_halls = df_events.merge(df_halls, on='hall')

In [9]:
# Adding the information, that gates are predefined
# Note for the later application: As soon as an event gets entered into the system, this logic can be triggered by the backend to gather the entrance distance from the database, attaching it to the event, before storing it into the list and triggering the algorithm
# Evene better. When hall A1 and entrance west are selected, the backend (shows the user the distance and) stores the distance in the database based on the distance list (actually the same as the note abvoe but maybe slightly different. The outcome is the same)
def get_distance(row):
    entrance = row['entrance']
    return row[f'distance_{entrance}']

df_events_halls['distance_entrance'] = df_events_halls.apply(get_distance, axis=1)
df_events_halls.drop(columns=['distance_north', 'distance_north_east', 'distance_east', 'distance_west', 'distance_north_west'], inplace=True)

In [10]:
df_events_halls.head()

Unnamed: 0,event,date,status,demand,hall,entrance,distance_entrance
0,inhorgenta,2025-02-17,aufbau,280,A1,west,60
1,inhorgenta,2025-02-18,aufbau,330,A1,west,60
2,inhorgenta,2025-02-19,aufbau,420,A1,west,60
3,inhorgenta,2025-02-20,aufbau,420,A1,west,60
4,inhorgenta,2025-02-21,laufzeit,2278,A1,west,60


In [11]:
# Save the df_events_closest_parking_lot DataFrame to a CSV file for the allocation algorithm
try:
    os.makedirs('../data/output_data')
except OSError as e:
    print('Error createing directory:', e)
try:
    df_events_halls.to_parquet('../data/output_data/df_events_halls.parquet', index=False)
    print('Data saved to ../data/output_data/df_events_halls.parquet')
except OSError as e:
    print('Error saving data:', e)

Error createing directory: [Errno 17] File exists: '../data/output_data'
Data saved to ../data/output_data/df_events_halls.parquet


In [12]:
# Create a list to store the distances between halls and all parking lots
halls_parking_distances = []

# Iterate over each row in df_events_halls
for _, hall in df_halls.iterrows():
    # Iterate over each row in df_parking_lots_distances
    for _, parking_lot in df_parking_lots_distances.iterrows():
        # Calculate total distances
        distance_north = hall['distance_north'] + parking_lot['distance_north']
        distance_north_east = hall['distance_north_east'] + parking_lot['distance_north_east']
        distance_east = hall['distance_east'] + parking_lot['distance_east']
        distance_west = hall['distance_west'] + parking_lot['distance_west']
        distance_north_west = hall['distance_north_west'] + parking_lot['distance_north_west']
        
        # Append new row to distances list
        halls_parking_distances.append({
            'hall': hall['hall'],
            'parking_lot': parking_lot['parking_lot'],
            'distance_north': distance_north,
            'distance_north_east': distance_north_east,
            'distance_east': distance_east,
            'distance_west': distance_west,
            'distance_north_west': distance_north_west
        })

# Create df_halls_parking_distances DataFrame
df_halls_parking_distances = pd.DataFrame(halls_parking_distances)

In [13]:
# Now each hall has a distance to each parking lot via each entrance
df_halls_parking_distances.head()

Unnamed: 0,hall,parking_lot,distance_north,distance_north_east,distance_east,distance_west,distance_north_west
0,A1,P1 Nord (Tor 17a - Tor 11c),600,730,760,1060,1000
1,A1,P1 Nord (westl. Tor 17a),580,710,740,1010,950
2,A1,P2 Nord (östl. Tor 11c),620,690,660,1360,1300
3,A1,P3,900,1020,910,1360,1300
4,A1,P4,1000,1130,1010,1460,1400


In [14]:
# Merge events with halls and parking lots distances
df_events_halls_parking_distances = df_events_halls.merge(df_halls_parking_distances, on='hall')

In [15]:
# Adding the information, that gates are predefined
def get_parking_distance(row):
    entrance = row['entrance']
    return row[f'distance_{entrance}']

def calculate_total_distance(row):
    return row['distance_entrance'] + row['distance_parking']

df_events_halls_parking_distances['distance_parking'] = df_events_halls_parking_distances.apply(get_parking_distance, axis=1)
df_events_halls_parking_distances['distance'] = df_events_halls_parking_distances.apply(calculate_total_distance, axis=1)
df_events_halls_parking_distances.drop(columns=['distance_north', 'distance_north_east', 'distance_east', 'distance_west', 'distance_north_west', 'distance_entrance', 'distance_parking'], inplace=True)

In [16]:
df_events_halls_parking_distances.head()

Unnamed: 0,event,date,status,demand,hall,entrance,parking_lot,distance
0,inhorgenta,2025-02-17,aufbau,280,A1,west,P1 Nord (Tor 17a - Tor 11c),1120
1,inhorgenta,2025-02-17,aufbau,280,A1,west,P1 Nord (westl. Tor 17a),1070
2,inhorgenta,2025-02-17,aufbau,280,A1,west,P2 Nord (östl. Tor 11c),1420
3,inhorgenta,2025-02-17,aufbau,280,A1,west,P3,1420
4,inhorgenta,2025-02-17,aufbau,280,A1,west,P4,1520


In [17]:
# Save the df_halls_parking_distances DataFrame to a CSV file for the allocation algorithm
try:
    os.makedirs('../data/output_data')
except OSError as e:
    print('Error createing directory:', e)
try:
    df_halls_parking_distances.to_parquet('../data/output_data/halls_parking_distances.parquet', index=False)
    print('Data saved to ../data/output_data/halls_parking_distances.parquet')
except OSError as e:
    print('Error saving data:', e)

Error createing directory: [Errno 17] File exists: '../data/output_data'
Data saved to ../data/output_data/halls_parking_distances.parquet


In [18]:
# Merge capacity of parking lot from parking_lot_capacity to df_events_halls_parking_lots_distances_capacity where df_parking_lots_capacity['valid_from'] <= df_events_halls_parking_distances['date'] <= df_parking_lots_capacity['valid_to']
# Create an empty dataframe to store the results
df_events_halls_parking_lots_distances_capacity = pd.DataFrame()

# Loop over each row in df_events_halls_parking_distances
for idx, row in df_events_halls_parking_distances.iterrows():
    # Find matching rows in df_parking_lots_capacity
    mask = ((df_parking_lots_capacity['valid_from'] <= row['date']) & 
            (df_parking_lots_capacity['valid_to'] >= row['date']))
    matching_rows = df_parking_lots_capacity[mask]
    
    # If there are matching rows, merge them with the current row
    if not matching_rows.empty:
        combined_row = pd.merge(row.to_frame().transpose(), matching_rows, how='left', on='parking_lot')
        df_events_halls_parking_lots_distances_capacity = pd.concat([df_events_halls_parking_lots_distances_capacity, combined_row])

# Reset the index of the resulting dataframe
df_events_halls_parking_lots_distances_capacity.reset_index(drop=True, inplace=True)

# Drop valid_from and valid_to columns in df_events_halls_parking_lots_distances_capacity
df_events_halls_parking_lots_distances_capacity.drop(['valid_from', 'valid_to'], axis=1, inplace=True)

In [19]:
# Now each event has a distance to each parking lot via each entrance
# One row is an event at a specific date, it's hall and a specific parking lot with all distances (not to the entrance but to the parking lot)
df_events_halls_parking_lots_distances_capacity[df_events_halls_parking_lots_distances_capacity['event'] == 'inhorgenta'].head()

Unnamed: 0,event,date,status,demand,hall,entrance,parking_lot,distance,capacity
0,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,west,P1 Nord (Tor 17a - Tor 11c),1120,2750
1,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,west,P1 Nord (westl. Tor 17a),1070,350
2,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,west,P2 Nord (östl. Tor 11c),1420,500
3,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,west,P3,1420,200
4,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,west,P4,1520,100


In [20]:
# Find the parking lot with the smallest distance for each event
df_events_closest_parking_lot = df_events_halls_parking_lots_distances_capacity.copy()

# Filter rows where capacity >= demand
df_events_parking_lot_min_capacity = df_events_halls_parking_lots_distances_capacity[df_events_halls_parking_lots_distances_capacity['capacity'] >= df_events_halls_parking_lots_distances_capacity['demand']].copy()

# Add parking_delta column
df_events_parking_lot_min_capacity['parking_delta'] = df_events_parking_lot_min_capacity['capacity'] - df_events_parking_lot_min_capacity['demand']

# Add max_demand
df_events_parking_lot_min_capacity['max_demand'] = df_events_parking_lot_min_capacity.groupby('event')['demand'].transform('max')

In [21]:
# Verify, that there are no negative parking delta
if (df_events_parking_lot_min_capacity['parking_delta'] < 0).any():
    print(df_events_parking_lot_min_capacity[df_events_parking_lot_min_capacity['parking_delta'] < 0])
else:
    print('No event demand exceeds potentially allocated parking capacity')

No event demand exceeds potentially allocated parking capacity


In [22]:
# Save the df_events_closest_parking_lot DataFrame to a CSV file for the allocation algorithm
try:
    os.makedirs('../data/output_data')
except OSError as e:
    print('Error createing directory:', e)
try:
    df_events_parking_lot_min_capacity.to_parquet('../data/output_data/df_events_parking_lot_min_capacity.parquet', index=False)
    print('Data saved to ../data/output_data/df_events_parking_lot_min_capacity.parquet')
except OSError as e:
    print('Error saving data:', e)

Error createing directory: [Errno 17] File exists: '../data/output_data'
Data saved to ../data/output_data/df_events_parking_lot_min_capacity.parquet
