In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import sys

sys.path.append("../../../")

import getpass
import pickle
from pathlib import Path

import contextily as cx
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gdown
from relativewealth import nightlights, settings
from relativewealth.dhs import generate_dhs_cluster_level_data
from relativewealth.feature_engineering import (
    categorize_wealth_index,
    generate_features,
)
from relativewealth.iso3 import get_region_name
from relativewealth.rollout_grids import get_region_filtered_bingtile_grids

# Model Prediction on Rollout Grids

This notebook runs the final model to create relative wealth estimations over populated areas within the given country. The model predictions will have a spatial resolution of 2.4km.

The model generates the `predicted relative wealth` variable, which informs us estimates the wealth of a specific area. A value of 0 signifies that the area's wealth is similar to the national average. In contrast, a positive or negative value suggests above or below-average national wealth, respectively.

The predicted relative wealth value is later binned into 5 wealth categories A-E by dividing the distribution into quintiles (every 20th percentile).

## Set up Data Access
The following cell will prompt you to enter your EOG username and password. See [this page](https://eogdata.mines.edu/products/register/) to learn how to set-up your EOG account.

In [3]:
#papermill_description="Log-in using EOG credentials"
username = os.environ.get("EOG_USER", None)
username = username if username is not None else input("Username?")
password = os.environ.get("EOG_PASSWORD", None)
password = password if password is not None else getpass.getpass("Password?")

# set save_token to True so that access token gets stored in ~/.eog_creds/eog_access_token
access_token = nightlights.get_eog_access_token(username, password, save_token=True)

2023-06-13 18:13:34.247 | INFO     | relativewealth.nightlights:get_eog_access_token:42 - Saving access_token to /home/butchtm/.eog_creds/eog_access_token.txt
2023-06-13 18:13:34.251 | INFO     | relativewealth.nightlights:get_eog_access_token:50 - Adding access token to environment var EOG_ACCESS_TOKEN


## Set country-specific parameters

In [4]:
COUNTRY_CODE = "tl"
COUNTRY_OSM = "east-timor"
OOKLA_YEAR = 2019
NIGHTLIGHTS_YEAR = 2016
MODEL_WEIGHTS_URL = None
ROLLOUT_DATE = None

In [None]:
# https://drive.google.com/file/d/1q9ev9qlXf5p0-CTuJ8IO6Qi-tEIuRqCp/view?usp=share_link

In [None]:
DATA_DIR = settings.DATA_DIR.resolve()
ROLLOUT_DIR = DATA_DIR/f"rollout/{COUNTRY_CODE}"
ROLLOUT_DIR.mkdir(parents=True,exist_ok=True)

In [None]:
ROLLOUT_DATE = "-".join(os.getcwd().split("/")[-2].split("-")[:3]) if ROLLOUT_DATE is None else ROLLOUT_DATE
rollout_grids_path = Path(f"{ROLLOUT_DIR}/{ROLLOUT_DATE}-{COUNTRY_CODE}-rollout-grids.geojson")
rollout_grids_path

In [None]:
ROLLOUT_DIR.mkdir(parents=True, exist_ok=True)

## Set Model Parameters

In [None]:
# Model to use for prediction
MODEL_SAVE_PATH = Path(f"{ROLLOUT_DIR}/{ROLLOUT_DATE}-{COUNTRY_CODE}-single-country-model.pkl")

In [None]:
#papermill_description="Load model"
if not MODEL_SAVE_PATH.exists() and MODEL_WEIGHTS_URL is not None:
    MODEL_SAVE_PATH.parent.mkdir(parents=True,exist_ok=True)
    model_path = gdown.download(url=MODEL_WEIGHTS_URL, fuzzy=True, output=MODEL_SAVE_PATH.as_posix())
    print(f"Downloaded model from url {MODEL_WEIGHTS_URL}")
else:
    print(f"Using existing model on {MODEL_SAVE_PATH}")

## Load Country Rollout AOI

The rollout area of interest is split into 2.4km grid tiles (zoom level 14), matching the areas used during model training. The grids are also filtered to only include populated areas based on Meta's High Resolution Settlement Layer (HRSL) data.

Refer to the previous notebook `2_tl_generate_grids.ipynb` for documentation on generating this grid.

In [None]:
aoi = gpd.read_file(rollout_grids_path)
# aoi.explore()  # Uncomment to view data in a map

## Generate Features For Rollout AOI

In [None]:
%%time
#papermill_description="Generate features for rollout AOI"
rollout_aoi = aoi.copy()

# Create features dataframe using generate_features module
features = generate_features(
    rollout_aoi,
    country_osm=COUNTRY_OSM,
    ookla_year=OOKLA_YEAR,
    nightlights_year=NIGHTLIGHTS_YEAR,
    scale=False,
    features_only=True,
)

## Inspect the generated features

In [None]:
features.info()

## Run Model on AOI

### Load Model

In [None]:
#papermill_description="Load model"
with open(MODEL_SAVE_PATH, "rb") as f:
    model = pickle.load(f)

### Make Predictions

In [None]:
#papermill_description="Predict Relative Wealth Index for rollout AOI"
rollout_aoi["Predicted Relative Wealth Index"] = model.predict(features.values)


## Binning predictions into wealth categories

Afterwards, we label the predicted relative wealth by binning them into 5 categories: `A`, `B`, `C`, `D`, and `E` where `A` is the highest and `E` is the lowest. 

We can create these wealth categories by splitting the output `Predicted Relative Wealth Index` distribution into 5 equally sized **quintiles**, i.e. every 20th percentile. 

This categorization may be modified to suit the context of the target country.

In [None]:
#papermill_description="Bin Relative Wealth Index for rollout AOI"
# Simple quintile approach
rollout_aoi["Predicted Wealth Category (quintile)"] = categorize_wealth_index(
    rollout_aoi["Predicted Relative Wealth Index"], split_quantile=False
).astype(str)

### Save Output

In [None]:
%%time
#papermill_description="Save Relative Wealth Index for rollout AOI"
rollout_aoi.to_file(
    f"{ROLLOUT_DIR}/{ROLLOUT_DATE}-{COUNTRY_CODE}-rollout-output.geojson",
    driver="GeoJSON",
    index=False,
)

In [None]:
#papermill_description="Save Relative Wealth Index for rollout AOI with features"
rollout_output_with_features = rollout_aoi.join(features)
rollout_output_with_features.to_file(
    f"{ROLLOUT_DIR}/{ROLLOUT_DATE}-{COUNTRY_CODE}-rollout-output-with-features.geojson",
    driver="GeoJSON",
    index=False,
)

## Visualizations

### Inspect predicted wealth index and output dataframe

In [None]:
rollout_aoi[["Predicted Relative Wealth Index"]].hist()

In [None]:
rollout_aoi.head()

### Create Static Maps
#### Plot Predicted Relative Wealth Index

In [None]:
#papermill_description="Create static maps"
plt.cla()
plt.clf()
rollout_aoi_plot = rollout_aoi.to_crs("EPSG:3857")
ax = rollout_aoi_plot.plot(
    "Predicted Relative Wealth Index",
    figsize=(20, 8),
    cmap="viridis",
    legend=True,
    legend_kwds={"shrink": 0.8},
)
cx.add_basemap(ax, source=cx.providers.OpenStreetMap.Mapnik)
ax.set_axis_off()
plt.title("Predicted Relative Wealth Index")
plt.tight_layout()
plt.savefig(f"{ROLLOUT_DIR}/{ROLLOUT_DATE}-{COUNTRY_CODE}-predicted-wealth-index.png")
plt.show()

In [None]:
#papermill_description="Create static maps for bins"
plt.cla()
plt.clf()
rollout_aoi_plot = rollout_aoi.to_crs("EPSG:3857")
ax = rollout_aoi_plot.plot(
    "Predicted Wealth Category (quintile)",
    figsize=(20, 8),
    cmap="viridis_r",
    legend=True,
)
cx.add_basemap(ax, source=cx.providers.OpenStreetMap.Mapnik)
ax.set_axis_off()
plt.title("Predicted Wealth Category")
plt.tight_layout()
plt.savefig(f"{ROLLOUT_DIR}/{ROLLOUT_DATE}-{COUNTRY_CODE}-predicted-wealth-bin.png")
plt.show()

### Create an Interactive Map

In [None]:
cols_of_interest = [
    "quadkey",
    "shapeName",
    "shapeGroup",
    "pop_count",
    "avg_rad_mean",
    "mobile_2019_mean_avg_d_kbps_mean",
    "fixed_2019_mean_avg_d_kbps_mean",
    "poi_count",
    "road_count",
    "Predicted Relative Wealth Index",
    "Predicted Wealth Category (quintile)",
]

# Warning: This can be a bit laggy due to the large amount of tiles being visualized

# Uncomment the ff if you want to viz the raw wealth predictions
# rollout_aoi.explore(column='Predicted Relative Wealth Index', tooltip=cols_of_interest, cmap="viridis")

# Uncomment the ff if you want to view the quintiles
# rollout_aoi.explore(column='Predicted Wealth Category (quintile)', tooltip=cols_of_interest, cmap="viridis_r")

Alternatively, you may also try to visualize this interactively in [Kepler](https://kepler.gl/demo) by uploading the rollout output geojson file.