In [None]:
import geopandas
import math
import numpy as np
import pandas as pd
import shapely
import shapely.vectorized

In [None]:
def generate_area_cell(lat, lon, degrees): 
    # calculate the area of a gridcell given the center lat and lon and the size in degrees
    if lon <0:
        lon+=360
    R = 6371
    f0 = math.radians(lat-degrees*0.5)
    f1 = math.radians(lat+degrees*0.5)
    l0 = math.radians(lon-degrees*0.5)
    l1 = math.radians(lon+degrees*0.5)

    return (math.sin(f1)-math.sin(f0)) * (l1 - l0) * R**2

In [None]:
# make grids
degrees = 0.5
da_degrees = 2.5
lons = np.linspace(-180+degrees*.5,180-degrees*.5,int(360/degrees))
lats = np.linspace(-90+degrees*.5,90-degrees*.5,int(180/degrees))
xv, yv = np.meshgrid(lons,lats)

subgrid_df = pd.DataFrame({"lat_05": yv.ravel(),"lon_05": xv.ravel(),})
subgrid_df['LAT'] = subgrid_df['lat_05']//da_degrees*da_degrees+da_degrees*0.5
subgrid_df['LON'] = subgrid_df['lon_05']//da_degrees*da_degrees+da_degrees*0.5

rescale_degrees = 5
subgrid_df['LAT_5'] = subgrid_df['lat_05']//rescale_degrees*rescale_degrees+rescale_degrees*0.5
subgrid_df['LON_5'] = subgrid_df['lon_05']//rescale_degrees*rescale_degrees+rescale_degrees*0.5

rescale_degrees = 2.5
subgrid_df['LAT_25'] = subgrid_df['lat_05']//rescale_degrees*rescale_degrees+rescale_degrees*0.5
subgrid_df['LON_25'] = subgrid_df['lon_05']//rescale_degrees*rescale_degrees+rescale_degrees*0.5

subgrid_df['area'] = subgrid_df.apply(lambda x: generate_area_cell(x['lat_05'], x['lon_05'], 0.5), axis=1)
print(subgrid_df.shape)

In [None]:
# match on shape files
gdl = geopandas.read_file("data/worldbank_shapefiles/WB_countries_Admin0_10m.shp")

subgrid_df["ISO_A3"] = np.nan
subgrid_df["country"] = np.nan
for i, row in gdl.iterrows():
    contains = shapely.vectorized.contains(row.geometry,xv,yv).ravel()
    idx = np.argwhere(contains==True).ravel()

    subgrid_df.loc[idx, "ISO_A3"]=row['ISO_A3']
    subgrid_df.loc[idx, "country"]=row["NAME_EN"]

In [None]:
# high and low income countries
with open("data/high_income_countries.txt") as f:
    hic = f.readlines()
    hic = [h.strip('\n') for h in hic]
with open("data/low_middle_income_countries.txt") as f:
    lic = f.readlines()
    lic = [h.strip('\n') for h in lic]    

In [None]:
# clean countries to match shapefile names
lic = pd.DataFrame({"lic":lic})
lic.loc[lic.lic=="China (People's Republic of)"]="People's Republic of China"
lic.loc[lic.lic=="Democratic Republic of Congo"]="Democratic Republic of the Congo"
lic.loc[lic.lic=="Congo"]="Republic of the Congo"
lic.loc[lic.lic=="Lao People's Democratic Republic"]="Laos"
lic.loc[lic.lic=="Democratic People's Republic of Korea"] ="North Korea"
lic.loc[lic.lic=="North Macedonia"] = "Republic of Macedonia"
lic.loc[lic.lic=="Timor-Leste"] = "East Timor"
lic.loc[lic.lic=="Cabo Verde"] = 'Cape Verde'
lic.loc[lic.lic=="Côte d'Ivoire"] = 'Ivory Coast'
lic.loc[lic.lic=="Eswatini"] = "eSwatini"
lic.loc[lic.lic=="Gambia"] = "The Gambia"
lic.loc[lic.lic=="Syrian Arab Republic"] = "Syria"

hic = pd.DataFrame({"hic":hic})
hic.loc[hic.hic=="Bahamas, The"]="Bahamas"
hic.loc[hic.hic=="Korea, Rep."]="South Korea"
hic.loc[hic.hic=="Taiwan, China"]="Taiwan"
hic.loc[hic.hic=="Slovak Republic"]="Slovakia"
hic.loc[hic.hic=="United States"]="United States of America"

In [None]:
subgrid_df = subgrid_df.merge(lic,
                           how="left",
                           left_on="country",
                           right_on="lic"
                )
subgrid_df = subgrid_df.merge(hic,
                           how="left",
                           left_on="country",
                           right_on="hic"
                )

subgrid_df["high_income"] = ~pd.isna(subgrid_df.hic)
subgrid_df["low_middle_income"] = ~pd.isna(subgrid_df.lic)
subgrid_df.drop(columns=["lic","hic"], inplace=True)
subgrid_df.to_csv("data/worldbank_grid_high_low_income.csv", index=False)

In [None]:
subgrid_df.head()