In [1]:
import geopandas as gpd

### 1. Dissolving GADM administrative boundaries shapefile to country-level
The GADM administrative boundaries dataset originally comes at finer detail than the country-level. Here, we dissolve it based on the "country" column to retain a shapefile only containing country-level polygons and boundaries.

In [2]:
## Defining file path and feature class
gdb_path = "00_GLOBAL FINAL/gadm administrative boundaries/gadm_410.gdb"
feature_class = "gadm"

In [None]:
## Reading the feature class into a gdf
gdf = gpd.read_file(gdb_path, layer=feature_class)

In [None]:
## Dissolving based on "country" column
dissolved_gdf = gdf.dissolve(by="COUNTRY")

In [None]:
## Saving as shapefile
dissolved_gdf.to_file("00_GLOBAL FINAL/gadm administrative boundaries/gadm_410_dissolved.shp")

### 2. Assigning country name to schools
Next, we overlay the school point shapefile with the dissolved country-level administrative boundaries shapefile to assign the correct country name at a fine spatial detail to each school. This approach ensures schools in close to national borders are still assigned the correct country.

In [4]:
## File paths
schools_file = "00_GLOBAL FINAL/schools_global_unedited/schools_global_unedited.shp"
countries_file = "00_GLOBAL FINAL/gadm administrative boundaries/gadm_410_dissolved.shp"

In [5]:
## Loading schools and country boundaries shapefiles
schools = gpd.read_file(schools_file)
countries = gpd.read_file(countries_file)

In [6]:
##Ensuring both layers have the same CRS
schools = schools.to_crs(countries.crs)

In [8]:
## Checking CRS
print(schools.crs)
print(countries.crs)

EPSG:4326
EPSG:4326


In [9]:
## Spatial join to assign country names to schools
schools_with_countries = gpd.sjoin(schools, countries, how="left", predicate="within")

In [10]:
## Checking columns of the original school gdf
schools.columns

Index(['name', 'amenity', 'grades', 'isced_leve', 'geometry'], dtype='object')

In [11]:
## Checking columns of the new, merged gdf
schools_with_countries.columns

Index(['name', 'amenity', 'grades', 'isced_leve', 'geometry', 'index_right',
       'COUNTRY', 'UID', 'GID_0', 'NAME_0', 'VARNAME_0', 'GID_1', 'NAME_1',
       'VARNAME_1', 'NL_NAME_1', 'ISO_1', 'HASC_1', 'CC_1', 'TYPE_1',
       'ENGTYPE_1', 'VALIDFR_1', 'GID_2', 'NAME_2', 'VARNAME_2', 'NL_NAME_2',
       'HASC_2', 'CC_2', 'TYPE_2', 'ENGTYPE_2', 'VALIDFR_2', 'GID_3', 'NAME_3',
       'VARNAME_3', 'NL_NAME_3', 'HASC_3', 'CC_3', 'TYPE_3', 'ENGTYPE_3',
       'VALIDFR_3', 'GID_4', 'NAME_4', 'VARNAME_4', 'CC_4', 'TYPE_4',
       'ENGTYPE_4', 'VALIDFR_4', 'GID_5', 'NAME_5', 'CC_5', 'TYPE_5',
       'ENGTYPE_5', 'GOVERNEDBY', 'SOVEREIGN', 'DISPUTEDBY', 'REGION',
       'VARREGION', 'CONTINENT', 'SUBCONT', 'Shape_Leng', 'Shape_Area'],
      dtype='object')

In [12]:
## Retaining only relevant columns in the new, merged gdf
schools_with_countries = schools_with_countries[["COUNTRY", "GID_0", "CONTINENT"] + list(schools.columns)]

In [13]:
## Overview
schools_with_countries

Unnamed: 0,COUNTRY,GID_0,CONTINENT,name,amenity,grades,isced_leve,geometry
0,Nigeria,NGA,Africa,Ceta international school,school,,,POINT (5.60699 6.30471)
1,Angola,AGO,Africa,Escola São José de Cluny,school,,,POINT (13.23774 -8.81384)
2,Réunion,REU,Africa,École primaire de la Balance,school,,,POINT (55.27266 -21.03803)
3,Réunion,REU,Africa,École élémentaire et maternelle de Villèle,school,,,POINT (55.26029 -21.05377)
4,Réunion,REU,Africa,Centre de formation professionnelle CENTHOR,school,,,POINT (55.26733 -21.04499)
...,...,...,...,...,...,...,...,...
1381763,Jamaica,JAM,North America,RH404,school,,,POINT (-77.49828 18.01589)
1381764,Jamaica,JAM,North America,Bathroom,school,,,POINT (-77.49829 18.01592)
1381765,Cuba,CUB,North America,Escuela Primaria,school,,,POINT (-76.08265 20.7706)
1381766,Cuba,CUB,North America,Escuela Primaria,school,,,POINT (-79.62198 22.00088)


### 3. Dropping schools with no country assigned
In some cases, the spatial join did not assign any country name to a given school. Spot checks in QGIS revealed these are usually schools located on land bodies not mapped in the country boundaries shapefile, such as small (<1 sqm) islands or peninsulas. As the number of schools with no country assigned is relatively small (~1,000) compared to the overall dataset (~1.4 million), we decided to drop these for simplicity.

In [14]:
## Checking schools with no country assigned
schools_with_countries.loc[schools_with_countries["COUNTRY"].isna()]

Unnamed: 0,COUNTRY,GID_0,CONTINENT,name,amenity,grades,isced_leve,geometry
2454,,,,Unnamed School (OSM ID: Unknown),school,,,POINT (10.18199 36.81106)
2471,,,,????? ??????,school,,3,POINT (10.1987 36.82706)
2472,,,,Unnamed School (OSM ID: Unknown),school,,,POINT (10.19647 36.82901)
2771,,,,Escola 19 de Setembro,school,,,POINT (-15.58016 11.85941)
3704,,,,École Primaire Les Berges du Lac,school,,,POINT (10.24608 36.83663)
...,...,...,...,...,...,...,...,...
1367239,,,,Escuela Arcadio Martínez,school,,,POINT (-77.72428 8.92188)
1378643,,,,Unnamed School (OSM ID: Unknown),school,,,POINT (-78.95985 9.47016)
1378734,,,,Unnamed School (OSM ID: Unknown),school,,,POINT (-73.61523 18.22305)
1381468,,,,gustykiting,school,,,POINT (-68.82458 12.12785)


In [15]:
## Removing schools with no country assigned
schools_with_countries = schools_with_countries.dropna(subset=["COUNTRY"])

In [16]:
## Overview
schools_with_countries

Unnamed: 0,COUNTRY,GID_0,CONTINENT,name,amenity,grades,isced_leve,geometry
0,Nigeria,NGA,Africa,Ceta international school,school,,,POINT (5.60699 6.30471)
1,Angola,AGO,Africa,Escola São José de Cluny,school,,,POINT (13.23774 -8.81384)
2,Réunion,REU,Africa,École primaire de la Balance,school,,,POINT (55.27266 -21.03803)
3,Réunion,REU,Africa,École élémentaire et maternelle de Villèle,school,,,POINT (55.26029 -21.05377)
4,Réunion,REU,Africa,Centre de formation professionnelle CENTHOR,school,,,POINT (55.26733 -21.04499)
...,...,...,...,...,...,...,...,...
1381763,Jamaica,JAM,North America,RH404,school,,,POINT (-77.49828 18.01589)
1381764,Jamaica,JAM,North America,Bathroom,school,,,POINT (-77.49829 18.01592)
1381765,Cuba,CUB,North America,Escuela Primaria,school,,,POINT (-76.08265 20.7706)
1381766,Cuba,CUB,North America,Escuela Primaria,school,,,POINT (-79.62198 22.00088)


### 4. Saving final schools dataset

In [None]:

schools_with_countries.to_file("00_GLOBAL FINAL/schools_final.shp")