Temporary notebook to run and manually test the OSM feature generation function.

Cluster-level data is taken from the GCS bucket.

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append("../../../")

import uuid

import geopandas as gpd
import pandas as pd
from shapely import wkt

from povertymapping import settings, osm
from povertymapping.osm import OsmDataManager

In [None]:
# Create an OsmDataManager instance once.
# This auto-caches requested data in RAM, so next fetches of the data are faster.
osm_data_manager = OsmDataManager(cache_dir=settings.ROOT_DIR/"data/data_cache")

# Load in Cluster Data

In [None]:
# This is just a temporary function, so not all proper pre-processing are done 
#   (e.g. filtering out 0s, creating a box instead of circular buffer)
def load_cluster_gdf(path, change_geom_to_buffered=True):
    df = pd.read_csv(path)

    # Create a GeoDataFrame from the longitude, latitude columns.
    gdf = gpd.GeoDataFrame(df, geometry=df["geometry"].apply(wkt.loads), crs="epsg:4326")

    if change_geom_to_buffered:
        gdf["geometry"] = gdf.to_crs("EPSG:3857").geometry.buffer(2000).to_crs("epsg:4326")

    print(f"There are {len(gdf):,} clusters.")

    return gdf

In [None]:
# Try Timor Leste
country = "east-timor"
ground_truth_path = settings.DATA_DIR/"outputs_dhs_tl_TLHR71DT_TLGE71FL_by_cluster.csv"
gdf = load_cluster_gdf(ground_truth_path)
gdf = osm.add_osm_poi_features(gdf, country, osm_data_manager)
gdf = osm.add_osm_road_features(gdf, country, osm_data_manager)
gdf.explore()

In [None]:
# Try Myanmar
country = "myanmar"
ground_truth_path = settings.DATA_DIR/"outputs_dhs_mm_MMHR71DT_MMGE71FL_by_cluster.csv"
gdf = load_cluster_gdf(ground_truth_path)
gdf = osm.add_osm_poi_features(gdf, country, osm_data_manager)
gdf = osm.add_osm_road_features(gdf, country, osm_data_manager)
gdf.explore()

In [None]:
# Try Cambodia
country = "cambodia"
ground_truth_path = settings.DATA_DIR/"outputs_dhs_kh_KHHR73DT_KHGE71FL_by_cluster.csv"
gdf = load_cluster_gdf(ground_truth_path)
gdf = osm.add_osm_poi_features(gdf, country, osm_data_manager)
gdf = osm.add_osm_road_features(gdf, country, osm_data_manager)
gdf.explore()

In [None]:
# Try Philippines
country = "philippines"
ground_truth_path = settings.DATA_DIR/"outputs_dhs_ph_PHHR71DT_PHGE71FL_by_cluster.csv"
gdf = load_cluster_gdf(ground_truth_path)
gdf = osm.add_osm_poi_features(gdf, country, osm_data_manager)
gdf = osm.add_osm_road_features(gdf, country, osm_data_manager)
gdf.explore()