# First algorithm Exploration

Let's get this project starting!

In [21]:
# Import libraries
import pandas as pd
import numpy as np
import os

In [22]:
# Import data
df_events = pd.read_csv('../data/input_data/events.csv')
df_halls = pd.read_csv('../data/input_data/halls.csv')
df_parking_lots_capacity = pd.read_csv('../data/input_data/parking_lots_capacity.csv')
df_parking_lots_distances = pd.read_csv('../data/input_data/parking_lots_distances.csv')

In [23]:
# Format data to datetime dd.mm.yyyy
df_events['date'] = pd.to_datetime(df_events['date'], format='%d.%m.%y')
df_parking_lots_capacity['valid_from'] = pd.to_datetime(df_parking_lots_capacity['valid_from'], format='%d.%m.%y')
df_parking_lots_capacity['valid_to'] = pd.to_datetime(df_parking_lots_capacity['valid_to'], format='%d.%m.%y')

In [24]:
# In this version each event has one hall
df_events.head()

Unnamed: 0,event,date,status,demand,hall
0,inhorgenta,2025-02-17,aufbau,280,A1
1,inhorgenta,2025-02-18,aufbau,330,A1
2,inhorgenta,2025-02-19,aufbau,420,A1
3,inhorgenta,2025-02-20,aufbau,420,A1
4,inhorgenta,2025-02-21,laufzeit,2278,A1


In [25]:
# Each hall has a distance to each entrance (with aritficial numbers for now)
df_halls.head()

Unnamed: 0,hall,distance_north,distance_north_east,distance_east,distance_west,distance_north_west
0,A1,200,300,160,60,100
1,A2,175,260,140,80,140
2,A3,150,220,120,100,180
3,A4,150,180,100,120,220
4,A5,175,140,80,140,260


In [26]:
# Each parking lot has a capacity to a given time (may change due to construnctions, etc)
df_parking_lots_capacity.head()

Unnamed: 0,parking_lot,capacity,valid_from,valid_to
0,P1 Nord (Tor 17a - Tor 11c),2750,2025-01-01,2025-03-02
1,P1 Nord (westl. Tor 17a),350,2025-01-01,2025-03-02
2,P2 Nord (östl. Tor 11c),500,2025-01-01,2025-03-02
3,P3,200,2025-01-01,2025-03-02
4,P4,100,2025-01-01,2025-03-02


In [27]:
# Each parking lot also has a distance to each entrance (with aritficial numbers for now)
df_parking_lots_distances.head()

Unnamed: 0,parking_lot,distance_north,distance_north_east,distance_east,distance_west,distance_north_west
0,P1 Nord (Tor 17a - Tor 11c),400,430,600,1000,900
1,P1 Nord (westl. Tor 17a),380,410,580,950,850
2,P2 Nord (östl. Tor 11c),420,390,500,1300,1200
3,P3,700,720,750,1300,1200
4,P4,800,830,850,1400,1300


In [28]:
# Merge events with halls to figure out distances of the events to the entrances
df_events_halls = df_events.merge(df_halls, on='hall')

In [29]:
# Create a list to store the distances between halls and all parking lots
halls_parking_distances = []

# Iterate over each row in df_events_halls
for _, hall in df_halls.iterrows():
    # Iterate over each row in df_parking_lots_distances
    for _, parking_lot in df_parking_lots_distances.iterrows():
        # Calculate total distances
        distance_north = hall['distance_north'] + parking_lot['distance_north']
        distance_north_east = hall['distance_north_east'] + parking_lot['distance_north_east']
        distance_east = hall['distance_east'] + parking_lot['distance_east']
        distance_west = hall['distance_west'] + parking_lot['distance_west']
        distance_north_west = hall['distance_north_west'] + parking_lot['distance_north_west']
        
        # Append new row to distances list
        halls_parking_distances.append({
            'hall': hall['hall'],
            'parking_lot': parking_lot['parking_lot'],
            'distance_north': distance_north,
            'distance_north_east': distance_north_east,
            'distance_east': distance_east,
            'distance_west': distance_west,
            'distance_north_west': distance_north_west
        })

# Create df_halls_parking_distances DataFrame
df_halls_parking_distances = pd.DataFrame(halls_parking_distances)

In [30]:
# Now each hall has a distance to each parking lot via each entrance
df_halls_parking_distances.head()

Unnamed: 0,hall,parking_lot,distance_north,distance_north_east,distance_east,distance_west,distance_north_west
0,A1,P1 Nord (Tor 17a - Tor 11c),600,730,760,1060,1000
1,A1,P1 Nord (westl. Tor 17a),580,710,740,1010,950
2,A1,P2 Nord (östl. Tor 11c),620,690,660,1360,1300
3,A1,P3,900,1020,910,1360,1300
4,A1,P4,1000,1130,1010,1460,1400


In [31]:
# Save the df_halls_parking_distances DataFrame to a CSV file for the allocation algorithm
try:
    os.makedirs('../data/output_data')
except OSError as e:
    print('Error createing directory:', e)
try:
    df_halls_parking_distances.to_parquet('../data/output_data/halls_parking_distances.parquet', index=False)
    print('Data saved to ../data/output_data/halls_parking_distances.parquet')
except OSError as e:
    print('Error saving data:', e)

Error createing directory: [Errno 17] File exists: '../data/output_data'
Data saved to ../data/output_data/halls_parking_distances.parquet


In [32]:
# Merge events with halls and parking lots distances
df_events_halls_parking_distances = df_events.merge(df_halls_parking_distances, on='hall')

In [33]:
# Merge capacity of parking lot from parking_lot_capacity to df_events_halls_parking_lots_distances_capacity where df_parking_lots_capacity['valid_from'] <= df_events_halls_parking_distances['date'] <= df_parking_lots_capacity['valid_to']

# Create an empty dataframe to store the results
df_events_halls_parking_lots_distances_capacity = pd.DataFrame()

# Loop over each row in df_events_halls_parking_distances
for idx, row in df_events_halls_parking_distances.iterrows():
    # Find matching rows in df_parking_lots_capacity
    mask = ((df_parking_lots_capacity['valid_from'] <= row['date']) & 
            (df_parking_lots_capacity['valid_to'] >= row['date']))
    matching_rows = df_parking_lots_capacity[mask]
    
    # If there are matching rows, merge them with the current row
    if not matching_rows.empty:
        combined_row = pd.merge(row.to_frame().transpose(), matching_rows, how='left', on='parking_lot')
        df_events_halls_parking_lots_distances_capacity = pd.concat([df_events_halls_parking_lots_distances_capacity, combined_row])

# Reset the index of the resulting dataframe
df_events_halls_parking_lots_distances_capacity.reset_index(drop=True, inplace=True)

# Drop valid_from and valid_to columns in df_events_halls_parking_lots_distances_capacity
df_events_halls_parking_lots_distances_capacity.drop(['valid_from', 'valid_to'], axis=1, inplace=True)

In [34]:
# Now each event has a distance to each parking lot via each entrance
# One row is an event at a specific date, it's hall and a specific parking lot with all distances (not to the entrance but to the parking lot)
df_events_halls_parking_lots_distances_capacity[df_events_halls_parking_lots_distances_capacity['event'] == 'inhorgenta'].head()

Unnamed: 0,event,date,status,demand,hall,parking_lot,distance_north,distance_north_east,distance_east,distance_west,distance_north_west,capacity
0,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P1 Nord (Tor 17a - Tor 11c),600,730,760,1060,1000,2750
1,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P1 Nord (westl. Tor 17a),580,710,740,1010,950,350
2,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P2 Nord (östl. Tor 11c),620,690,660,1360,1300,500
3,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P3,900,1020,910,1360,1300,200
4,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P4,1000,1130,1010,1460,1400,100


In [35]:
# Find the parking lot with the smallest distance for each event
df_events_closest_parking_lot = df_events_halls_parking_lots_distances_capacity.copy()

# Filter rows where capacity >= demand
df_events_closest_parking_lot_min_capacity = df_events_closest_parking_lot[df_events_closest_parking_lot['capacity'] >= df_events_closest_parking_lot['demand']].copy()

# Add parking_delta column
df_events_closest_parking_lot_min_capacity['parking_delta'] = df_events_closest_parking_lot_min_capacity['capacity'] - df_events_closest_parking_lot_min_capacity['demand']

# Add a column for the minimum distance and the entrance
df_events_closest_parking_lot_min_capacity['min_distance'] = df_events_closest_parking_lot_min_capacity[['distance_north', 'distance_north_east', 'distance_east', 'distance_west', 'distance_north_west']].min(axis=1)
df_events_closest_parking_lot_min_capacity['closest_entrance'] = df_events_closest_parking_lot_min_capacity[['distance_north', 'distance_north_east', 'distance_east', 'distance_west', 'distance_north_west']].idxmin(axis=1).str.replace('distance_', '') # Remove "distance_" string from 'closest_entrance'

# Add max_demand
df_events_closest_parking_lot_min_capacity['max_demand'] = df_events_closest_parking_lot_min_capacity.groupby('event')['demand'].transform('max')


# Sort by event and minimum distance, then drop duplicates, keeping the first (i.e., the one with the smallest distance)
# df_events_closest_parking_lot_min_capacity = df_events_closest_parking_lot_min_capacity.sort_values(['event', 'min_distance']).drop_duplicates(['event', 'date'], keep='first')

In [36]:
# Now each event has a parking lot with the smallest distance to it
df_events_closest_parking_lot_min_capacity.head(11)

Unnamed: 0,event,date,status,demand,hall,parking_lot,distance_north,distance_north_east,distance_east,distance_west,distance_north_west,capacity,parking_delta,min_distance,closest_entrance,max_demand
0,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P1 Nord (Tor 17a - Tor 11c),600,730,760,1060,1000,2750,2470,600,north,2576
1,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P1 Nord (westl. Tor 17a),580,710,740,1010,950,350,70,580,north,2576
2,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P2 Nord (östl. Tor 11c),620,690,660,1360,1300,500,220,620,north,2576
6,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P7,850,970,850,1260,1200,400,120,850,north,2576
8,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,P9 - P12,700,830,720,1160,1150,3000,2720,700,north,2576
9,inhorgenta,2025-02-17 00:00:00,aufbau,280,A1,Parkhaus West,1200,1320,1200,1560,1550,4600,4320,1200,north,2576
10,inhorgenta,2025-02-18 00:00:00,aufbau,330,A1,P1 Nord (Tor 17a - Tor 11c),600,730,760,1060,1000,2750,2420,600,north,2576
11,inhorgenta,2025-02-18 00:00:00,aufbau,330,A1,P1 Nord (westl. Tor 17a),580,710,740,1010,950,350,20,580,north,2576
12,inhorgenta,2025-02-18 00:00:00,aufbau,330,A1,P2 Nord (östl. Tor 11c),620,690,660,1360,1300,500,170,620,north,2576
16,inhorgenta,2025-02-18 00:00:00,aufbau,330,A1,P7,850,970,850,1260,1200,400,70,850,north,2576


In [37]:
# Save the df_events_closest_parking_lot DataFrame to a CSV file for the allocation algorithm
try:
    os.makedirs('../data/output_data')
except OSError as e:
    print('Error createing directory:', e)
try:
    df_events_closest_parking_lot_min_capacity.to_parquet('../data/output_data/df_events_closest_parking_lot_min_capacity.parquet', index=False)
    print('Data saved to ../data/output_data/df_events_closest_parking_lot_min_capacity.parquet')
except OSError as e:
    print('Error saving data:', e)

Error createing directory: [Errno 17] File exists: '../data/output_data'
Data saved to ../data/output_data/df_events_closest_parking_lot_min_capacity.parquet


## No allocation algorithm, just exploration of data:

In [38]:
# Verify, that there are no negative parking delta
if (df_events_closest_parking_lot_min_capacity['parking_delta'] < 0).any():
    print(df_events_closest_parking_lot_min_capacity[df_events_closest_parking_lot_min_capacity['parking_delta'] < 0])
else:
    print('No event demand exceeds allocated parking capacity')

No event demand exceeds allocated parking capacity


In [39]:
df_events_closest_parking_lot_min_capacity[['event', 'date', 'status', 'hall', 'demand', 'parking_delta', 'capacity', 'parking_lot', 'min_distance', 'closest_entrance']].head()

Unnamed: 0,event,date,status,hall,demand,parking_delta,capacity,parking_lot,min_distance,closest_entrance
0,inhorgenta,2025-02-17 00:00:00,aufbau,A1,280,2470,2750,P1 Nord (Tor 17a - Tor 11c),600,north
1,inhorgenta,2025-02-17 00:00:00,aufbau,A1,280,70,350,P1 Nord (westl. Tor 17a),580,north
2,inhorgenta,2025-02-17 00:00:00,aufbau,A1,280,220,500,P2 Nord (östl. Tor 11c),620,north
6,inhorgenta,2025-02-17 00:00:00,aufbau,A1,280,120,400,P7,850,north
8,inhorgenta,2025-02-17 00:00:00,aufbau,A1,280,2720,3000,P9 - P12,700,north


In [40]:
# The code just searches the closest parking lot matching the capacity and demand for each event but as we see one parking lot is used for multiple events and adding the demands exceeds the capacity of the parking lot
df_events_closest_parking_lot_min_capacity[df_events_closest_parking_lot_min_capacity['date'] == pd.to_datetime('2025-02-23')][['event', 'date', 'status', 'hall', 'demand', 'parking_delta', 'capacity', 'parking_lot', 'min_distance', 'closest_entrance']].head(20)

Unnamed: 0,event,date,status,hall,demand,parking_delta,capacity,parking_lot,min_distance,closest_entrance
60,inhorgenta,2025-02-23 00:00:00,laufzeit,A1,1795,955,2750,P1 Nord (Tor 17a - Tor 11c),600,north
68,inhorgenta,2025-02-23 00:00:00,laufzeit,A1,1795,1205,3000,P9 - P12,700,north
69,inhorgenta,2025-02-23 00:00:00,laufzeit,A1,1795,2805,4600,Parkhaus West,1200,north
180,Münchner Autotage,2025-02-23 00:00:00,laufzeit,B3,1000,1750,2750,P1 Nord (Tor 17a - Tor 11c),500,north
188,Münchner Autotage,2025-02-23 00:00:00,laufzeit,B3,1000,2000,3000,P9 - P12,600,north
189,Münchner Autotage,2025-02-23 00:00:00,laufzeit,B3,1000,3600,4600,Parkhaus West,1100,north
330,Lopec,2025-02-23 00:00:00,aufbau,C6,140,2610,2750,P1 Nord (Tor 17a - Tor 11c),480,north_east
331,Lopec,2025-02-23 00:00:00,aufbau,C6,140,210,350,P1 Nord (westl. Tor 17a),460,north_east
332,Lopec,2025-02-23 00:00:00,aufbau,C6,140,360,500,P2 Nord (östl. Tor 11c),440,north_east
333,Lopec,2025-02-23 00:00:00,aufbau,C6,140,60,200,P3,770,north_east
