# MAXIMAL COVERAGE ALGORITHM

## Libraries

In [118]:
import numpy as np
import os
import pandas as pd
import re

## Constants

In [149]:
data_path = "/Users/sammcmanagan/Library/Mobile Documents/com~apple~CloudDocs/Documents/M.Sc Statistics & Data Science/Modern Data Analytics/MDA-Project/Data"

input_path_segmented = "/2_DataSegmented"
input_path_dist = "/6_DistanceMatricesTest"
final_data_path = "/Users/sammcmanagan/Library/Mobile Documents/com~apple~CloudDocs/Documents/M.Sc Statistics & Data Science/Modern Data Analytics/MDA-Project/Data/7_DataForApp"

cities = ["Antwerpen", "Brugge", "Brussels", "Charleroi", "Gent", "Leuven", "Liege", "Oostende"]


COVERAGE_RADIUS = 150

## Functions

In [150]:
# Function which removes .1 .2 .3 ... after )

def extract_coordinates(s):
    s = s.split('(')[1].split(')')[0].split(', ')
    s = map(float, s)
    return tuple(s)

## Importing data

In [160]:

os.chdir(data_path + input_path_dist)
city_data = {}

for city in cities:
        city_data[city] = pd.read_csv(city + "_cost_matrix.csv", index_col=0)
        print(f"Data for {city} loaded successfully.")

Data for Antwerpen loaded successfully.
Data for Brugge loaded successfully.
Data for Brussels loaded successfully.
Data for Charleroi loaded successfully.
Data for Gent loaded successfully.
Data for Leuven loaded successfully.
Data for Liege loaded successfully.
Data for Oostende loaded successfully.


## Comparisons

In [161]:
for city, df in city_data.items():
    df = df.apply(pd.to_numeric, errors='coerce')
    
    num_columns = df.shape[1]
    mid_index = num_columns // 2

    old_aed_columns = df.iloc[:, :mid_index]
    new_aed_columns = df.iloc[:, mid_index:]

    df['Old_AED_within_150m'] = old_aed_columns.apply(lambda row: (row < 150).any(), axis=1)
    df['New_AED_within_150m'] = new_aed_columns.apply(lambda row: (row < 150).any(), axis=1)

    percentage_old_aed = df['Old_AED_within_150m'].mean() * 100
    percentage_new_aed = df['New_AED_within_150m'].mean() * 100

    print(f"Results for {city}:")
    print(f"Percentage of cardiac arrests with an AED within 150m (old placements): {percentage_old_aed:.2f}%")
    print(f"Percentage of cardiac arrests with an AED within 150m (new placements): {percentage_new_aed:.2f}%\n")


Results for Antwerpen:
Percentage of cardiac arrests with an AED within 150m (old placements): 20.93%
Percentage of cardiac arrests with an AED within 150m (new placements): 38.24%

Results for Brugge:
Percentage of cardiac arrests with an AED within 150m (old placements): 11.11%
Percentage of cardiac arrests with an AED within 150m (new placements): 20.00%

Results for Brussels:
Percentage of cardiac arrests with an AED within 150m (old placements): 32.65%
Percentage of cardiac arrests with an AED within 150m (new placements): 54.65%

Results for Charleroi:
Percentage of cardiac arrests with an AED within 150m (old placements): 16.01%
Percentage of cardiac arrests with an AED within 150m (new placements): 21.91%

Results for Gent:
Percentage of cardiac arrests with an AED within 150m (old placements): 14.83%
Percentage of cardiac arrests with an AED within 150m (new placements): 35.33%

Results for Leuven:
Percentage of cardiac arrests with an AED within 150m (old placements): 16.98%


## Creating old_aeds, new_aeds and nearest aed dataframes

In [162]:
###Creating a single df of all the points we need for the app
all_dataframes = []


for city in cities:
    df = city_data[city]

    num_columns = df.shape[1]
    mid_index = num_columns // 2

    # First half of columns are old aeds, second half are new aeds
    old_aed_columns = df.columns[:mid_index]
    new_aed_columns = df.columns[mid_index:]

    old_aeds_df = pd.DataFrame(old_aed_columns, columns=["Old AEDs"])
    new_aeds_df = pd.DataFrame(new_aed_columns, columns=["New AEDs"])

    old_aeds_df[['lat', 'lon']] = old_aeds_df['Old AEDs'].apply(extract_coordinates).apply(pd.Series)
    old_aeds_df.drop(columns=['Old AEDs'], inplace=True)
    new_aeds_df[['lat', 'lon']] = new_aeds_df['New AEDs'].apply(extract_coordinates).apply(pd.Series)
    new_aeds_df.drop(columns=['New AEDs'], inplace=True)

    os.chdir(data_path + input_path_segmented)
    cards = pd.read_csv(city + "_cards_test.csv")[['latitude', 'longitude']]
    cards.rename(columns={'latitude': 'lat'}, inplace=True)
    cards.rename(columns={'longitude': 'lon'}, inplace=True)
    
    df_new = df.iloc[:, mid_index:].apply(pd.to_numeric, errors='coerce')
    min_indices = np.argmin(df_new.to_numpy(), axis=1)

    cards = pd.DataFrame({
        'lat': cards['lat'].values,
        'lon': cards['lon'].values,
        'aed_lat': new_aeds_df.iloc[min_indices]['lat'].values,
        'aed_lon': new_aeds_df.iloc[min_indices]['lon'].values
    })
    cards['distance'] = None
    for i, idx in enumerate(min_indices):
        cards.at[i, 'distance'] = df_new.iat[i, idx]

    # Add type and city columns
    cards['type'] = 'card'
    old_aeds_df['type'] = 'old_aed'
    new_aeds_df['type'] = 'new_aed'
    
    cards['city'] = city
    old_aeds_df['city'] = city
    new_aeds_df['city'] = city
    
    # Add missing columns to old_aeds_df and new_aeds_df
    old_aeds_df['aed_lat'] = np.nan
    old_aeds_df['aed_lon'] = np.nan
    old_aeds_df['distance'] = np.nan
    
    new_aeds_df['aed_lat'] = np.nan
    new_aeds_df['aed_lon'] = np.nan
    new_aeds_df['distance'] = np.nan
    
    # Reorder columns to match the cards dataframe
    old_aeds_df = old_aeds_df[['lat', 'lon', 'aed_lat', 'aed_lon', 'distance', 'type', 'city']]
    new_aeds_df = new_aeds_df[['lat', 'lon', 'aed_lat', 'aed_lon', 'distance', 'type', 'city']]
    cards = cards[['lat', 'lon', 'aed_lat', 'aed_lon', 'distance', 'type', 'city']]
    
    # Concatenate dataframes for the current city
    city_df = pd.concat([cards, old_aeds_df, new_aeds_df], ignore_index=True)
    all_dataframes.append(city_df)

# Concatenate all city dataframes into one mega dataframe
mega_df = pd.concat(all_dataframes, ignore_index=True)
mega_df.to_csv(final_data_path + "/app_data.csv", index=False)