# Generating Minimal distance to facilities

In [1]:
import os
from pathlib import Path
from logzero import logger
import geopandas as gpd
import pandas as pd
#!pip install pygeos

In [2]:
GEOJSONS = "nl_osm_locations"

childcare = gpd.read_file(f'{GEOJSONS}/nl_childcare.geojson')
#childcare.columns

childcare = gpd.read_file(f'{GEOJSONS}/nl_college.geojson')
#childcare.columns

**Note**: The column names are inconsistent in data, so I'll create a function to extract unified data from all data frames an return a single union out of it to make it easier to work with. 

In [3]:
def read_geojson(
    name, 
    path=GEOJSONS, 
    ext="geojson",
    columns=['id', 'name', 'addr:postcode', 'geometry'],
    include_name=True,   # include a column containing the file name. Helps with future filtering.
    skip_error=False
):
    file_path = os.path.join(path, f"{name}.{ext}")
    try:
        df = gpd.read_file(file_path)[columns]
    except Exception as e:
        if skip_error:
            logger.warning(f"failed to read: {file_path}")
            logger.warning(e)
            return None
        raise e

    if include_name:
        df['type'] = name
    return df

In [4]:
geojsons = [Path(i).stem for i in os.listdir(GEOJSONS)]
#geojsons

In [5]:
def read_geojson_multi(names, *args, **kwargs):
    dfs = [read_geojson(name, *args, **kwargs) for name in names]
    return pd.concat(dfs, axis=0, ignore_index=True)

df = read_geojson_multi(names=geojsons, skip_error=True)
#df

Extract `POINT` object:

In [6]:
df['lat'] = df['geometry'].y
df['lon'] = df['geometry'].x
df = df.drop(columns=['geometry'])
#df

In [7]:
df.to_csv("nl_osm_locations.csv")

In [9]:
CRS = "EPSG:32733"

# Facilities
facilities = pd.read_csv('nl_osm_locations.csv')
facilities = gpd.GeoDataFrame(
    facilities, geometry=gpd.points_from_xy(facilities['lon'], facilities['lat'], crs=CRS)
).drop(columns=['lat', 'lon'])

facility_types = list(facilities['type'].unique())
#facility_types

# Neighbourhoods
neighborhoods = pd.read_csv('neighbourhood_to_coordinate.csv')
neighborhoods = gpd.GeoDataFrame(
    neighborhoods, geometry=gpd.points_from_xy(neighborhoods['longitude'], neighborhoods['latitude']), crs=CRS
).drop(columns=['latitude', 'longitude'])
#neighborhoods

In [14]:
def join_nearest_type(type_):
    return gpd.sjoin_nearest(neighborhoods, facilities.loc[facilities['type'] == type_], distance_col='distance')

def join_nearest_types(types):
    dfs = [join_nearest_type(t) for t in types]
    return pd.concat(dfs, axis=0, ignore_index=True)

joined = join_nearest_types(facility_types)
joined['distance'] = round(joined['distance'] * 110.486, 2)
#joined

NotImplementedError: Currently, only PyGEOS >= 0.10.0 supports `nearest_all`. To use PyGEOS within GeoPandas, you need to install PyGEOS: 'conda install pygeos' or 'pip install pygeos'

In [15]:
# Categories
category = {
    "playground": ["nl_kindergarten", "nl_childcare"],
    "sport": [
        "nl_sports_centre",
        "nl_sports_hall",
        "nl_fitness_station",
        "nl_fitness_centre",
        "nl_stadium",
        "nl_swimming_pool",
        "nl_golf_course",
        "nl_horse_riding",
        "nl_university",
        "nl_track",
        "nl_college",
        "nl_pitch"
    ],
    "park": ["nl_park"]
}

In [None]:
def categorize(type_):
    for c in category.keys():
        for t in category[c]:
            if t == type_:
                return c

categorize("nl_pitch")

In [None]:
joined['category'] = joined.apply(lambda r: categorize(r['type']), axis=1)
#joined
#joined.to_csv("neighborhood_facilities_joined_all.csv")

In [None]:
# Minimum Distance
cs = list(joined.drop(columns='geometry').columns)

# The geometry column doesn't play well with aggregations
min_cat = joined.groupby([
    'neighbourhood', 
    'municipality', 
    'play_demand', 
    'a_00_14',
    'category'
], as_index=False)['distance'].min()
#min_cat

joined.to_csv("neighbourhood_distances.csv")