# Gaborone Health Facility Accessibility Simulation
This notebook runs a focused simulation for Gaborone, analyzing spatial access to health facilities using OSRM routing and population data.

In [1]:
# Imports and setup
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree
import requests
from tqdm import tqdm

In [2]:
# Load and filter population for Gaborone only
pop = pd.read_csv('../census_datacleaning/census_population_with_coords.csv')
pop.columns = pop.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('/', '_')
pop.rename(columns={'city_town_village': 'city/town/village', 'census_district': 'district'}, inplace=True)
pop = pop[pop['district'].str.lower() == 'gaborone']
pop = pop.dropna(subset=['latitude', 'longitude'])
print(f'Gaborone population centers: {len(pop)}')

Gaborone population centers: 133


In [3]:
# Load and clean facilities
fac = pd.read_csv('../facilities_with_warehouses.csv')
fac.columns = fac.columns.str.strip()
fac = fac.dropna(subset=['Latitude', 'Longitude'])
fac = fac.rename(columns={'Latitude': 'latitude', 'Longitude': 'longitude'})
fac['Service Delivery Type'] = (
    fac['Service Delivery Type']
    .astype(str)
    .str.strip()
    .str.replace(r'\s+', ' ', regex=True)
)
fac = fac[~fac['Facility Name'].str.contains('prison|school', case=False, na=False)]

In [4]:
# OSRM endpoint
OSRM_URL = 'http://localhost:5001/route/v1/driving'
def osrm_distance(lat1, lon1, lat2, lon2):
    url = f'{OSRM_URL}/{lon1},{lat1};{lon2},{lat2}?overview=false'
    try:
        r = requests.get(url, timeout=8)
        data = r.json()
        if 'routes' in data and data['routes']:
            route = data['routes'][0]
            return route['distance'] / 1000, route['duration'] / 60
    except Exception:
        pass
    return np.nan, np.nan

In [5]:
# Nearest facility logic (same as main workflow, but scoped to Gaborone)
def nearest_facilities(pop_df, fac_df, subtype, n=1):
    if subtype == 'Clinic':
        sub = fac_df[
            fac_df['Service Delivery Type'].str.lower().isin(['clinic', 'clinic with maternity'])
        ]
    elif subtype == 'Hospital':
        sub = fac_df[
            fac_df['Service Delivery Type'].str.lower().isin(['primary hospital', 'district hospital']) |
            ((fac_df['Service Delivery Type'].str.lower() == 'referral hospital') & 
             (fac_df['Facility Name'].str.contains('Princess Marina|Nyangabgwe', case=False, na=False)))
        ]
    else:
        sub = fac_df[fac_df['Service Delivery Type'].str.lower() == subtype.lower()]
    if sub.empty:
        print(f'no facilities for {subtype}')
        return pop_df
    tree = cKDTree(sub[['latitude', 'longitude']].to_numpy())
    dist, idx = tree.query(pop_df[['latitude', 'longitude']].to_numpy(), k=n)
    dist = dist[:, None] if n == 1 else dist
    idx = idx[:, None] if n == 1 else idx
    nearest = [sub.iloc[idx[:, i]].reset_index(drop=True).add_suffix(f'_{i+1}') for i in range(n)]
    merged = pd.concat(nearest, axis=1)
    merged['crow_dist_km_1'] = dist[:, 0]
    return pd.concat([pop_df.reset_index(drop=True), merged], axis=1)

In [6]:
# Run nearest facility analysis for Gaborone
results = {}
for subtype in ['Health Post', 'Clinic', 'Hospital']:
    print(f'finding nearest {subtype.lower()}s...')
    results[subtype] = nearest_facilities(pop, fac, subtype)

finding nearest health posts...
finding nearest clinics...
finding nearest hospitals...


In [7]:
# OSRM routing for Gaborone
for subtype, df in results.items():
    print(f'routing for {subtype.lower()}...')
    dist_list, time_list = [], []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        d_km, t_min = osrm_distance(row['latitude'], row['longitude'],
                                    row['latitude_1'], row['longitude_1'])
        dist_list.append(d_km)
        time_list.append(t_min)
    df[f'osrm_dist_km_{subtype}'] = dist_list
    df[f'osrm_time_min_{subtype}'] = time_list

routing for health post...


100%|██████████| 133/133 [00:01<00:00, 86.84it/s] 


routing for clinic...


100%|██████████| 133/133 [00:01<00:00, 126.23it/s]


routing for hospital...


100%|██████████| 133/133 [00:01<00:00, 108.02it/s]


In [8]:
# Merge all results for Gaborone
merged = results['Health Post'].copy()
merged = merged.rename(columns={
    'Facility Name_1': 'nearest_HealthPost_name',
    'latitude_1': 'nearest_HealthPost_lat',
    'longitude_1': 'nearest_HealthPost_lon'
})
for subtype in ['Clinic', 'Hospital']:
    keep = [
        'city/town/village',
        'Facility Name_1', 'latitude_1', 'longitude_1',
        f'osrm_dist_km_{subtype}', f'osrm_time_min_{subtype}'
    ]
    subdf = results[subtype][keep].drop_duplicates('city/town/village').rename(columns={
        'Facility Name_1': f'nearest_{subtype}_name',
        'latitude_1': f'nearest_{subtype}_lat',
        'longitude_1': f'nearest_{subtype}_lon'
    })
    merged = merged.merge(subdf, on='city/town/village', how='left')
merged.to_csv('gaborone_population_nearest_facilities_osrm.csv', index=False)
print('Gaborone simulation complete!')

Gaborone simulation complete!
