In [None]:
import geopandas
import pandas
import pyogrio

pandas.set_option("display.max_rows", 250)

In [None]:
gdf = geopandas.read_file("ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp")

In [None]:
list(gdf.columns)

In [None]:
df = gdf[[
 'ISO_A3',
 'NAME_LONG',
 'CONTINENT',
 'REGION_UN',
 'SUBREGION',
 'REGION_WB',
 'INCOME_GRP',
]].copy()

In [None]:
cgaz = geopandas.read_file("geoBoundariesCGAZ_ADM0.gpkg")

In [None]:
countries = cgaz[["shapeGroup", "shapeName"]].rename(columns={"shapeGroup": "ISO_A3", "shapeName": "name"})

In [None]:
df[df.ISO_A3.isin(set(df.ISO_A3) - set(countries.ISO_A3))]

In [None]:
df.loc[21, "ISO_A3"] = "FRA" # France
df.loc[64, "ISO_A3"] = "XKX" # Kosovo KOS
df.loc[52, "ISO_A3"] = "NOR" # Norway
df.loc[251, "ISO_A3"] = "128" # Spratly
df.loc[11, "ISO_A3"] = "129" # Palestine PSE
df.loc[242, "ISO_A3"] = "117" # Malvinas / Falklands FLK
df.loc[139, "ISO_A3"] = "120" # Islas Brasilieras
df.loc[170, "ISO_A3"] = "121" # Siachen Glacier

In [None]:
joined = countries.set_index("ISO_A3").join(df.set_index("ISO_A3")).dropna().drop(columns="name")
joined.index.name = "GID_0"
with pandas.option_context("display.max_rows", 1000):
    display(joined)

In [None]:
joined.shape

In [None]:
joined.to_csv("meta_adm0.csv")

In [None]:
adm1 = geopandas.read_file("geoBoundariesCGAZ_ADM1.gpkg")

In [None]:
adm1.columns

In [None]:
adm1.head()

In [None]:
adm2 = geopandas.read_file("geoBoundariesCGAZ_ADM2.gpkg")

In [None]:
adm2.columns

In [None]:
adm2.head()

## Join GADM and natural earth metadata

In [None]:
ne = gdf[[
 'ISO_A3',
 'NAME_LONG',
 'CONTINENT',
 'REGION_UN',
 'SUBREGION',
 'REGION_WB',
 'INCOME_GRP',
]].copy().set_index("ISO_A3")
ne

In [None]:
ne_extra = pandas.read_csv("naturalearth_meta_to_join.csv").set_index("ISO_A3")
ne_extra

In [None]:
! cp ../../results/input/admin-boundaries/gadm36_levels.gpkg .

In [None]:
pyogrio.list_layers("gadm_410-levels.gpkg")
pyogrio.list_layers("gadm36_levels.gpkg")

In [None]:
gadm0 = geopandas.read_file("gadm36_levels.gpkg", layer="level0", driver="pyogrio") # layer="ADM_0" for 410

In [None]:
gadm0_min = gadm0[["GID_0", "NAME_0"]].rename(columns={"GID_0": "ISO_A3", "NAME_0": "COUNTRY"}).set_index("ISO_A3") # "COUNTRY" for 410
gadm0_min

In [None]:
gadm0_ne = gadm0_min.join(pandas.concat([ne, ne_extra]))

In [None]:
gadm0_ne[gadm0_ne.NAME_LONG.isna()].reset_index().sort_values(by="ISO_A3")[['ISO_A3', 'COUNTRY']]

In [None]:
gadm0_ne.sort_values(by=["NAME_LONG", "COUNTRY"]).to_csv("gadm36_ne.csv")

In [None]:
ne.sort_values(by=["NAME_LONG"]).to_csv("ne.csv")

In [None]:
# HKG, XNC not in index
gadm0_ne.loc[[
    "ABW", "AIA", "ALA", "ATF", "BES", "CUW", "CYM", "FLK", "FRO", "GGY",
    "GLP", "GUF", "GUM", "IMN", "JEY", "MAF", "MSR", "MTQ", "MYT", "NCL",
    "PRI", "PSE", "TCA", "UMI", "VGB", "VIR", "XAD", "XCA", "XCL", "XKO",
]]

In [None]:
gadm1 = geopandas.read_file("gadm_410-levels.gpkg", layer="ADM_1", driver="pyogrio")

In [None]:
gadm1[["GID_0","COUNTRY","GID_1","NAME_1"]].to_csv("meta_adm1.csv", index=False)

In [None]:
gadm2 = geopandas.read_file("gadm_410-levels.gpkg", layer="ADM_2", driver="pyogrio")

In [None]:
gadm2[["GID_0","COUNTRY","GID_1","GID_2","NAME_2"]].to_csv("meta_adm2.csv", index=False)