# MAXIMAL COVERAGE ALGORITHM

## Libraries

In [3]:
import pandas as pd
import ast

## Constants

In [106]:
input_folder_path = "/Users/sammcmanagan/Library/Mobile Documents/com~apple~CloudDocs/Documents/M.Sc Statistics & Data Science/Modern Data Analytics/MDA-Project/Data/6_DistanceMatricesTest"

cities = ["Antwerpen", "Brugge", "Brussels", "Charleroi", "Gent", "Leuven", "Liege", "Oostende"]

# Initialize an empty dictionary to hold the DataFrames
city_data = {}

# Loop through each city and read the corresponding CSV file
for city in cities:
    csv_file_path = os.path.join(input_folder_path, f"{city}_cost_matrix.csv")
    if os.path.exists(csv_file_path):
        city_data[city] = pd.read_csv(csv_file_path, index_col=0)
        print(f"Data for {city} loaded successfully.")
    else:
        print(f"CSV file for {city} does not exist in the specified folder.")

# # Check the contents of the dictionary
# for city, df in city_data.items():
#     print(f"{city} DataFrame:\n", df.head(), "\n")


COVERAGE_RADIUS = 150

Data for Antwerpen loaded successfully.
Data for Brugge loaded successfully.
Data for Brussels loaded successfully.
Data for Charleroi loaded successfully.
Data for Gent loaded successfully.
Data for Leuven loaded successfully.
Data for Liege loaded successfully.
Data for Oostende loaded successfully.


## Comparisons

In [107]:
for city, df in city_data.items():
    df = df.apply(pd.to_numeric, errors='coerce')
    
    num_columns = df.shape[1]
    mid_index = num_columns // 2

    old_aed_columns = df.iloc[:, :mid_index]
    new_aed_columns = df.iloc[:, mid_index:]

    df['Old_AED_within_150m'] = old_aed_columns.apply(lambda row: (row < 150).any(), axis=1)
    df['New_AED_within_150m'] = new_aed_columns.apply(lambda row: (row < 150).any(), axis=1)

    percentage_old_aed = df['Old_AED_within_150m'].mean() * 100
    percentage_new_aed = df['New_AED_within_150m'].mean() * 100

    print(f"Results for {city}:")
    #print(df[['Old_AED_within_150m', 'New_AED_within_150m']])
    print(f"Percentage of cardiac arrests with an AED within 150m (old placements): {percentage_old_aed:.2f}%")
    print(f"Percentage of cardiac arrests with an AED within 150m (new placements): {percentage_new_aed:.2f}%\n")


Results for Antwerpen:
Percentage of cardiac arrests with an AED within 150m (old placements): 20.93%
Percentage of cardiac arrests with an AED within 150m (new placements): 38.24%

Results for Brugge:
Percentage of cardiac arrests with an AED within 150m (old placements): 11.11%
Percentage of cardiac arrests with an AED within 150m (new placements): 20.00%

Results for Brussels:
Percentage of cardiac arrests with an AED within 150m (old placements): 32.65%
Percentage of cardiac arrests with an AED within 150m (new placements): 54.65%

Results for Charleroi:
Percentage of cardiac arrests with an AED within 150m (old placements): 16.01%
Percentage of cardiac arrests with an AED within 150m (new placements): 21.91%

Results for Gent:
Percentage of cardiac arrests with an AED within 150m (old placements): 14.83%
Percentage of cardiac arrests with an AED within 150m (new placements): 35.33%

Results for Leuven:
Percentage of cardiac arrests with an AED within 150m (old placements): 16.98%


In [108]:
##fixing an issue where some aed cordinates had .1 at the end for duplciates between datasets

for city, df in city_data.items():
    df.columns = [col.split(')')[0] + ')' if ')' in col else col for col in df.columns]
    
# Assuming city_data is your DataFrame
columns_without_parentheses = [col for col in city_data['Antwerpen'].columns if not col.endswith(")")]
if columns_without_parentheses:
    print("There are column labels that do not end with ')'")
    print("Columns without parentheses:", columns_without_parentheses)
else:
    print("All column labels end with ')'")

All column labels end with ')'


In [115]:
###Same AEDs are being picked by the algorithm more than once
for city, df in city_data.items():
    num_columns = df.shape[1]
    mid_index = num_columns // 2
    num_unique_columns = df.iloc[:, mid_index:].columns.nunique()
    
    print(f"City: {city} Number of unique column labels:", num_unique_columns, mid_index)

City: Antwerpen Number of unique column labels: 638 722
City: Brugge Number of unique column labels: 152 161
City: Brussels Number of unique column labels: 1674 2024
City: Charleroi Number of unique column labels: 323 365
City: Gent Number of unique column labels: 362 389
City: Leuven Number of unique column labels: 153 167
City: Liege Number of unique column labels: 631 686
City: Oostende Number of unique column labels: 62 69


In [116]:
##Creating old aeds, new aeds and nearest aed dataframes

# Define an empty dictionary to store the nearest AED DataFrames for each city
nearest_aed_dfs = {}
old_aeds_dfs = {}
new_aeds_dfs = {}

# Iterate over each city DataFrame
for city in cities:
    df = city_data[city]
    
    # Determine the midpoint to split the columns
    num_columns = df.shape[1]
    mid_index = num_columns // 2
    
    # First half of columns are old aeds, second half are new aeds
    old_aed_columns = df.columns[:mid_index]
    new_aed_columns = df.iloc[:, mid_index:]
    # Find the nearest AED for each cardiac arrest location
    nearest_aed_coords = new_aed_columns.idxmin(axis=1)
    # Create DataFrames containing the column labels
    
    
    old_aeds_df = pd.DataFrame(old_aed_columns, columns=["Old AEDs"])
    new_aeds_df = pd.DataFrame(new_aed_columns, columns=["New AEDs"])
    nearest_aed_df = pd.DataFrame(nearest_aed_coords, columns=['Nearest AED Coordinates']).reset_index(drop=True)
    
    #Convert the strings to tuples using ast.literal_eval
    nearest_aed_df['Nearest AED Coordinates'] = nearest_aed_df['Nearest AED Coordinates'].apply(ast.literal_eval)
    #Split the tuples into separate columns for latitude and longitude
    nearest_aed_df[['aed_lat', 'aed_lon']] = pd.DataFrame(nearest_aed_df['Nearest AED Coordinates'].tolist(), index=nearest_aed_df.index)
    nearest_aed_df.drop(columns=['Nearest AED Coordinates'], inplace=True)
    
    
    ###repreat same steps for old and new aed dfs
    old_aeds_df['Old AEDs'] = old_aeds_df['Old AEDs'].apply(ast.literal_eval)
    new_aeds_df['New AEDs'] = new_aeds_df['New AEDs'].apply(ast.literal_eval)
    
    old_aeds_df[['lat', 'lon']] = pd.DataFrame(old_aeds_df['Old AEDs'].tolist(), index=nearest_aed_df.index)
    old_aeds_df.drop(columns=['Old AEDs'], inplace=True)
    new_aeds_df[['lat', 'lon']] =  pd.DataFrame(new_aeds_df['New AEDs'].tolist(), index=nearest_aed_df.index)
    new_aeds_df.drop(columns=['New AEDs'], inplace=True)
    
    
    # Append the DataFrame to the nearest_aed_dfs dictionary
    nearest_aed_dfs[city] = nearest_aed_df
    old_aeds_dfs[city] = old_aeds_df
    new_aeds_dfs[city] = new_aeds_df


# Access the DataFrame for a specific city, e.g., Antwerpen
nearest_aed_dfs["Brussels"]

  new_aeds_df = pd.DataFrame(new_aed_columns, columns=["New AEDs"])


ValueError: cannot reindex on an axis with duplicate labels

In [85]:
# Assuming city_data is your DataFrame
columns_without_parentheses = [col for col in city_data['Antwerpen'].columns if not col.endswith(")")]
if columns_without_parentheses:
    print("There are column labels that do not end with ')'")
    print("Columns without parentheses:", columns_without_parentheses)
else:
    print("All column labels end with ')'")

All column labels end with ')'


In [101]:
import pandas as pd

# Assuming city names are stored in the cities list
cities = ["Antwerpen", "Brugge", "Brussels", "Charleroi", "Gent", "Leuven", "Liege", "Oostende"]

# Dictionary to store city card DataFrames
city_card_dfs = {}

# Path to the directory containing the CSV files
data_directory = "/Users/sammcmanagan/Library/Mobile Documents/com~apple~CloudDocs/Documents/M.Sc Statistics & Data Science/Modern Data Analytics/MDA-Project/Data/2_DataSegmented"

# Iterate over each city
for city in cities:
    # Construct the file path
    file_path = f"{data_directory}/{city}_cards_test.csv"
    
    # Read the CSV file
    city_card_df = pd.read_csv(file_path)
    
    # Store the DataFrame in the dictionary
    city_card_dfs[city] = city_card_df

city_card_dfs['Brussels']

Unnamed: 0,id,latitude,longitude,city,event,vehicle,geometry
0,20222800084,50.845140,4.463350,Woluwe-Saint-Pierre (Woluwe-Saint-Pierre),39.0,ambulance,POINT (4.46335 50.84514)
1,20223300076,50.831160,4.376750,Ixelles (Ixelles),11.0,ambulance,POINT (4.37675 50.83116)
2,20223080201,50.864160,4.309110,Molenbeek-Saint-Jean (Molenbeek-Saint-Jean),11.0,pit,POINT (4.30911 50.86416)
3,20231010307,50.840090,4.337400,Anderlecht (Anderlecht),11.0,pit,POINT (4.3374 50.84009)
4,20222400233,50.861296,4.338800,1080 molenbeek-saint-jean (molenbeek-saint-jean),39.0,mug,POINT (4.3387999833 50.861296072)
...,...,...,...,...,...,...,...
1318,20221690288,50.827578,4.348657,1060 saint-gilles (saint-gilles),11.0,,POINT (4.3486566973 50.827578345)
1319,20221530325,50.868096,4.366398,1030 schaerbeek (schaerbeek),39.0,,POINT (4.3663977385 50.868095618)
1320,20223270256,50.851850,4.361280,Bruxelles (Bruxelles),39.0,ambulance,POINT (4.36128 50.85185)
1321,20223110051,50.803450,4.356290,Uccle (Uccle),11.0,mug,POINT (4.35629 50.80345)


In [103]:
# Iterate over each city to add aed_lat and aed_lon columns
for city in cities:
    # Get the nearest AED DataFrame
    nearest_aed_df = nearest_aed_dfs[city]
    
    # Get the city card DataFrame
    city_card_df = city_card_dfs[city]
    
    # Ensure the order matches (if necessary, sort/index align here)
    # For this example, we assume the order matches directly.
    
    # Add aed_lat and aed_lon columns to city card DataFrame
    city_card_df['aed_lat'] = nearest_aed_df['aed_lat']
    city_card_df['aed_lon'] = nearest_aed_df['aed_lon']
    
    # Retain only the specified columns
    city_card_df = city_card_df[['latitude', 'longitude', 'aed_lat', 'aed_lon']]
    
    # Update the dictionary with the modified DataFrame
    city_card_dfs[city] = city_card_df