# URBAN AREAS

## Libraries

In [None]:
import geopandas as gpd
import numpy as np
import os
import osmnx as ox
import pandas as pd
from scipy.spatial import KDTree
from shapely.geometry import LineString
from sklearn.model_selection import train_test_split

## Constants

In [None]:
belgium_polygons_path = "/Users/Jovan/Desktop/MDA-Project/Data/BelgiumUrbanPolygons/BE_STATBEL_SH_SU_UA_CITY_2019_v60.gpkg"
input_folder_path = "/Users/Jovan/Desktop/MDA-Project/Data/1_DataClean"
output_folder_path_segmenting = "/Users/Jovan/Desktop/MDA-Project/Data/2_DataSegmented"
output_folder_path_new = "/Users/Jovan/Desktop/MDA-Project/Data/3_DataNewAeds"

SEED = 42
TEST_SIZE = 0.25
SAMPLE_SIZE = 0.015
MIN_DISTANCE = 0.0012

## Functions

In [None]:
# Function to obtain all streets in urban polygons using open street map
def get_streets_within_polygon(polygon):
    bbox = polygon.bounds
    # Fetch streets within the bounding box
    streets = ox.graph_from_bbox(north=bbox[3], south=bbox[1], east=bbox[2], west=bbox[0], network_type='all')
    
    # Convert the graph to a GeoDataFrame
    streets_gdf = ox.graph_to_gdfs(streets, nodes=False, edges=True)
    
    # Filter streets in the bounding box to only those within the polygon
    streets_within_polygon = streets_gdf[streets_gdf.intersects(polygon)]
    
    return streets_within_polygon

# Function to sample points from streets
def sample_points_on_streets(edges, num_points=4):
    sampled_points = []
    for _, row in edges.iterrows():
        line = row.geometry
        if isinstance(line, LineString):
            length = line.length
            distances = np.linspace(0, length, num_points)
            points = [line.interpolate(distance) for distance in distances]
            sampled_points.extend(points)
    return sampled_points

def remove_close_points(gdf, min_distance):
    # Convert the GeoDataFrame to a numpy array of coordinates
    coords = np.array([[point.x, point.y] for point in gdf.geometry])
    
    # Build a KDTree for fast spatial indexing
    tree = KDTree(coords)
    
    # Find pairs of points within the specified minimum distance
    pairs = tree.query_pairs(min_distance)
    
    # Create a set to track points to keep
    to_keep = set(range(len(gdf)))
    
    for i, j in pairs:
        # Keep the first point and remove the second point in each pair
        if j in to_keep:
            to_keep.remove(j)
    
    # Filter the GeoDataFrame to keep only the required points
    filtered_gdf = gdf.iloc[list(to_keep)]
    
    return filtered_gdf

def filter_points_within_polygon(points, polygon, lat_column='latitude', lon_column='longitude'):
    # Create a Shapely Point geometry column from latitude and longitude
    points['geometry'] = gpd.points_from_xy(points[lon_column], points[lat_column])
    
    # Convert the points DataFrame to a GeoDataFrame
    gdf_points = gpd.GeoDataFrame(points, geometry='geometry')
    
    # Filter points within the polygon
    filtered_points = gdf_points[gdf_points.geometry.within(polygon)]
    
    return filtered_points

## Importing data

Importing the dataset with the polygons for the 15 Belgian cities.
Keeping only certain cities

In [None]:
cities = gpd.read_file(belgium_polygons_path)

objectid_list = [1, 2, 3, 4, 5, 6, 8, 11]  
cities = cities[cities['OBJECTID'].isin(objectid_list)]

city_name_mapping = {
    'Bruxelles / Brussel (greater city)': 'Brussels',
    'Charleroi (greater city)': 'Charleroi',
    'Li√®ge (greater city)': 'Liege'
}

cities['CityName'] = cities['CityName'].replace(city_name_mapping)

# Setting the CRS (Coordinate Reference System) to 4326
cities = cities.to_crs(epsg=4326)

Importing the clean data (from 1_DataCleaning.ipynb)

In [None]:
os.chdir(input_folder_path)

aeds = gpd.read_file("aeds.csv")
cards = gpd.read_file("cards.csv")
vehicles = gpd.read_file("vehicles.csv")

aeds['latitude'] = pd.to_numeric(aeds['latitude'], errors='coerce')
aeds['longitude'] = pd.to_numeric(aeds['longitude'], errors='coerce')

cards['latitude'] = pd.to_numeric(cards['latitude'], errors='coerce')
cards['longitude'] = pd.to_numeric(cards['longitude'], errors='coerce')

vehicles['latitude'] = pd.to_numeric(vehicles['latitude'], errors='coerce')
vehicles['longitude'] = pd.to_numeric(vehicles['longitude'], errors='coerce')

cards = cards.drop_duplicates(subset=['latitude', 'longitude'], keep='last')

## Segmenting aeds, cards and vehicles datasets by city

In [None]:
os.chdir(output_folder_path_segmenting)

for city_name, city_polygon in cities[['CityName', 'geometry']].values:
    #aeds
    city_aeds = filter_points_within_polygon(aeds, city_polygon)
    print(f"Number of AEDs in {city_name}: {len(city_aeds)}")
    city_aeds.to_csv(f'{city_name}_aeds.csv', index=False)
    
    #cards
    city_cards = filter_points_within_polygon(cards, city_polygon)
    print(f"Number of cards in {city_name}: {len(city_cards)}")
    city_cards.to_csv(f'{city_name}_cards.csv', index=False)
    
    # Split cards into train and test sets
    cards_train, cards_test = train_test_split(city_cards, test_size=TEST_SIZE, random_state=SEED)
    
    cards_train.to_csv(f'{city_name}_cards_train.csv', index=False)
    cards_test.to_csv(f'{city_name}_cards_test.csv', index=False)
    
    #vehicles
    city_vehicles = filter_points_within_polygon(vehicles, city_polygon)
    print(f"Number of vehicles in {city_name}: {len(city_vehicles)}")
    city_vehicles.to_csv(f'{city_name}_vehicles.csv', index=False)

    print("\n")

## Calculating possible AED locations (new_aeds)

In [None]:
os.chdir(output_folder_path_new)

for city_name, city_polygon in cities[['CityName', 'geometry']].values:
    print("Calculating possible AEDS for " + city_name + "...")
    # Name the polygon object according to the city
    exec(f"{city_name.lower()}_polygon = city_polygon")
    
    # Fetch streets within the city polygon and name the object accordingly
    streets = get_streets_within_polygon(city_polygon)
    exec(f"{city_name.lower()}_streets = streets")
    
    # Sample points on the streets and name the object accordingly
    points = sample_points_on_streets(streets, num_points=3)
    exec(f"{city_name.lower()}_points = points")
    
    # Convert points to a GeoDataFrame and name the object accordingly
    new_aeds = gpd.GeoDataFrame(geometry = points, crs = streets.crs)
    exec(f"{city_name.lower()}_new_aeds = new_aeds")
    
    # Randomly sample 1.5% of the coordinates
    new_aeds = new_aeds.sample(frac=SAMPLE_SIZE, random_state=SEED)
    exec(f"{city_name.lower()}_new_aeds = new_aeds")
    
    # removing points roughly within 50m of each other
    new_aeds = remove_close_points(new_aeds, min_distance=MIN_DISTANCE)
    exec(f"{city_name}_new_aeds = new_aeds")
    
    # Export new possible aed locations
    new_aeds.to_csv(f'{city_name}_new_aeds.csv', index=False)