# GHS Data Download and Aggregating

This notebook describes the process of downloading and aggregating GHS data from [here](https://ghsl.jrc.ec.europa.eu/download.php?). The notebook contains aggregation for the grid:

- Classification of pixels as rural or urban for the raster.
    - Using 21 and above for urban and 13 and below for rural.
- Fraction of each grid pixel that is rural/ urban
- Find population by grid pixel.

The epoch used in closest to the date `27th Feb 2023` and would be 2025.

In [87]:
%load_ext jupyter_black
import pandas as pd
import geopandas as gpd
from pathlib import Path
import os
import requests, zipfile, io
import rasterio
import rasterio.mask
import matplotlib.pyplot as plt
import rioxarray as rxr

The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black


In [88]:
base_dir = Path(os.getenv("STORM_DATA_DIR")) / "analysis/02_new_model_input/"
input_dir = base_dir / "06_settlement/input/"
shp_input_dir = base_dir / "02_housing_damage/input/"
grid_input_dir = base_dir / "02_housing_damage/output/"
output_dir = base_dir / "06_settlement/output/"

In [89]:
adm3_shp = gpd.read_file(
    shp_input_dir / "phl_adminboundaries_candidate_adm3.zip"
)

# grid
grid = gpd.read_file(grid_input_dir / "phl_0.1_degree_grid_land_overlap.gpkg")

In [90]:
smod_link = "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_SMOD_GLOBE_R2022A/GHS_SMOD_P2025_GLOBE_R2022A_54009_1000/V1-0/GHS_SMOD_P2025_GLOBE_R2022A_54009_1000_V1_0.zip"
pop_link = "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2022A/GHS_POP_P2025_GLOBE_R2022A_54009_100/V1-0/tiles/"

## Degree of Urbanisation

### Urban-Rural Classification

In [91]:
# Downloading the whole global data set as it is small
req = requests.get(smod_link, verify=False, stream=True)
with zipfile.ZipFile(io.BytesIO(req.content)) as zObj:
    fileNames = zObj.namelist()
    for fileName in fileNames:
        if fileName.endswith("tif"):
            content = zObj.open(fileName).read()
            open(input_dir / "SMOD" / fileName, "wb").write(content)



In [92]:
# Reading in raster
file_name = os.listdir(input_dir / "SMOD")
smod_raster = rasterio.open(input_dir / "SMOD" / file_name[0])
smod_array = smod_raster.read(1)
smod_array

array([[-200, -200, -200, ..., -200, -200, -200],
       [-200, -200, -200, ..., -200, -200, -200],
       [-200, -200, -200, ..., -200, -200, -200],
       ...,
       [-200, -200, -200, ..., -200, -200, -200],
       [-200, -200, -200, ..., -200, -200, -200],
       [-200, -200, -200, ..., -200, -200, -200]], dtype=int16)

In [93]:
# no data are set at -200
# water seems to be set to 10
# converting to similar CRS
smod_raster.crs

CRS.from_wkt('PROJCS["World_Mollweide",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mollweide"],PARAMETER["central_meridian",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]')

In [94]:
grid.total_bounds

array([114.25,   4.55, 126.65,  21.15])

In [95]:
# converting crs and clipping
# checking if crs are the same
smod_raster.crs == grid.crs

False

In [96]:
input_raster = rxr.open_rasterio(input_dir / "SMOD" / file_name[0])
input_raster.rio.crs

CRS.from_wkt('PROJCS["World_Mollweide",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mollweide"],PARAMETER["central_meridian",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]')

In [97]:
input_raster_wgs84 = input_raster.rio.reproject(grid.crs)
input_raster_wgs84_clip = input_raster_wgs84.rio.clip_box(*grid.total_bounds)

In [None]:
grid_vals = pd.DataFrame(
    {
        "id": grid["id"],
        "Centroid": grid["Centroid"],
        "urban": None,
        "rural": None,
        "water": None,
    }
)
for grd in grid.Centroid:
    grd_sel = grid[grid.Centroid == grd]
    grid_rast = input_raster_wgs84_clip.rio.clip(
        grd_sel["geometry"], all_touched=True
    )
    grid_vals.loc[grd_sel.index.values, ["urban"]] = (
        (grid_rast >= 21) & (grid_rast <= 30)
    ).sum().values / grid_rast.count().values
    grid_vals.loc[grd_sel.index.values, ["rural"]] = (
        (grid_rast >= 11) & (grid_rast <= 13)
    ).sum().values / grid_rast.count().values
    grid_vals.loc[grd_sel.index.values, ["water"]] = (
        grid_rast == 10
    ).sum().values / grid_rast.count().values

In [113]:
grid_vals.head(412)

Unnamed: 0,id,Centroid,urban,rural,water
0,101,114.3E_11.1N,0.0,0.0,1.0
1,4475,116.9E_7.9N,0.0,0.024793,0.975207
2,4639,117.0E_8.2N,0.0,0.008264,0.991736
3,4640,117.0E_8.1N,0.0,0.338843,0.661157
4,4641,117.0E_8.0N,0.0,0.793388,0.206612
...,...,...,...,...,...
407,10083,120.3E_14.9N,0.239669,0.760331,0.0
408,10084,120.3E_14.8N,0.330579,0.586777,0.082645
409,10085,120.3E_14.7N,0.082645,0.859504,0.057851
410,10086,120.3E_14.6N,,,


## Population

### Total Population by grid

In [114]:
# downloading the popoulation data
# selected from here: https://ghsl.jrc.ec.europa.eu/download.php?ds=pop
phl_boxes = ["R7_C30", "R7_C31", "R8_C30", "R8_C31", "R9_C30", "R9_C31"]
pop_url_link = "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2022A/GHS_POP_P2025_GLOBE_R2022A_54009_100/V1-0/tiles/"
file_list = [
    "GHS_POP_P2025_GLOBE_R2022A_54009_100_V1_0_" + patt + ".zip"
    for patt in phl_boxes
]
for file in file_list:
    req = requests.get(pop_url_link + file, allow_redirects=True)
    with zipfile.ZipFile(io.BytesIO(req.content)) as zObj:
        fileNames = zObj.namelist()
        for fileName in fileNames:
            if fileName.endswith("tif"):
                content = zObj.open(fileName).read()
                open(input_dir / "POP" / fileName, "wb").write(content)

In [115]:
# opening files and merging them
from rasterio.merge import merge

fileList = os.listdir(input_dir / "POP")
mosaic_raster = []
for file in fileList:
    rast = rasterio.open(input_dir / "POP" / file)
    mosaic_raster.append(rast)

merged_raster, out_raster = merge(mosaic_raster)
out_meta = rast.meta.copy()
out_meta.update(
    {
        "driver": "GTiff",
        "height": merged_raster.shape[1],
        "width": merged_raster.shape[2],
        "transform": out_raster,
    }
)
with rasterio.open(
    input_dir / "POP" / "phl_merged_ghs_pop.tif", "w", **out_meta
) as dest:
    dest.write(merged_raster)

In [116]:
# extracting total value by grid
pop_raster = rasterio.open(input_dir / "POP/phl_merged_ghs_pop.tif")
pop_array = pop_raster.read(1)

In [117]:
pop_raster = rxr.open_rasterio(input_dir / "POP/phl_merged_ghs_pop.tif")
pop_raster.rio.crs

CRS.from_wkt('PROJCS["World_Mollweide",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mollweide"],PARAMETER["central_meridian",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]')

In [118]:
pop_raster_wgs84 = pop_raster.rio.reproject(grid.crs)