In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from rapidfuzz import process

## Index Calculation

#### Preparation

In [None]:
# Load the country-level exposure data

df = pd.read_csv('/0002_DATA PRODUCTS/countries_exposure.csv')

In [None]:
# Dropping the "_0" suffix from the GID_0 column to simplify later processing of exposure columns

df = df.rename(columns={"GID_0":"GID"})

In [None]:
# Overview of the dataframe

df

Unnamed: 0,COUNTRY,GID,coastflood_0,coastflood_1,coastflood_1_pct,rivflood_0,rivflood_1,rivflood_1_pct,watersc_0,watersc_1,...,cyclns_l_1_pct,cyclns_u_0,cyclns_u_1,cyclns_u_1_pct,pm25_l_0,pm25_l_1,pm25_l_1_pct,pm25_u_0,pm25_u_1,pm25_u_1_pct
0,Aruba,ABW,136.0,0.0,0.0,0.0,136.0,1.000000,136.0,0.0,...,1.0,136,0,0.0,80.0,56.0,0.411765,136.0,0.0,0.000000
1,Afghanistan,AFG,1713.0,0.0,0.0,109.0,1604.0,0.936369,251.0,1462.0,...,0.0,1713,0,0.0,3.0,1710.0,0.998249,804.0,909.0,0.530648
2,Angola,AGO,875.0,0.0,0.0,722.0,153.0,0.174857,642.0,233.0,...,0.0,875,0,0.0,1.0,874.0,0.998857,875.0,0.0,0.000000
3,Anguilla,AIA,8.0,0.0,0.0,8.0,0.0,0.000000,8.0,0.0,...,1.0,0,8,1.0,8.0,0.0,0.000000,8.0,0.0,0.000000
4,Åland,ALA,25.0,0.0,0.0,24.0,1.0,0.040000,25.0,0.0,...,0.0,25,0,0.0,25.0,0.0,0.000000,25.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,South Africa,ZAF,8553.0,0.0,0.0,7064.0,1489.0,0.174091,1274.0,7279.0,...,0.0,8557,0,0.0,2153.0,6404.0,0.748393,7265.0,1292.0,0.150987
240,Zambia,ZMB,5081.0,0.0,0.0,3898.0,1183.0,0.232828,4047.0,1034.0,...,0.0,5081,0,0.0,0.0,5081.0,1.000000,5081.0,0.0,0.000000
241,Northern Cyprus,ZNC,152.0,0.0,0.0,152.0,0.0,0.000000,0.0,152.0,...,0.0,152,0,0.0,0.0,152.0,1.000000,152.0,0.0,0.000000
242,Zimbabwe,ZWE,3012.0,0.0,0.0,1808.0,1204.0,0.399734,562.0,2450.0,...,0.0,3012,0,0.0,342.0,2670.0,0.886454,3012.0,0.0,0.000000


In [None]:
# Dropping all columns counting non-exposed schools for each hazard (ending in "_0"), as they are no longer needed for calculation

df = df[[col for col in df.columns if not col.endswith('_0')]]

In [None]:
# Removing "_1" from column names for clarity sake as distinction is no longer necessary after dropping non-exposure columns

df.columns = [col.replace('_1_', '_').removesuffix('_1') for col in df.columns]

#### INDEX CALCULATION STEP 1: Logarithmic transformation of absolute values

In [8]:
df = df.copy()

In [None]:
# Select absolute exposure counts columns, not percentages

exposure_cols = [col for col in df.columns[2:] if not col.endswith('_pct')]

In [None]:
# Adding "_n" suffic to abolsute exposure counts columns for clarity

df.rename(columns={col: f"{col}_n" for col in exposure_cols}, inplace=True)

In [None]:
# Reselecting columns after name-change

exposure_cols = [col for col in df.columns[2:] if not col.endswith('_pct')]

In [None]:
# Apply log transformation to all absolute exposure counts using np.log1p

for col in exposure_cols:
    df[col] = np.log1p(df[col])

In [None]:
# Checking the updated countries_exposure df

df

Unnamed: 0,COUNTRY,GID,coastflood_n,coastflood_pct,rivflood_n,rivflood_pct,watersc_n,watersc_pct,heatwvs_n,heatwvs_pct,cyclns_l_n,cyclns_l_pct,cyclns_u_n,cyclns_u_pct,pm25_l_n,pm25_l_pct,pm25_u_n,pm25_u_pct
0,Aruba,ABW,0.0,0.0,4.919981,1.000000,0.000000,0.000000,0.000000,0.000000,4.919981,1.0,0.000000,0.0,4.043051,0.411765,0.000000,0.000000
1,Afghanistan,AFG,0.0,0.0,7.380879,0.936369,7.288244,0.853473,6.830874,0.539988,0.000000,0.0,0.000000,0.0,7.444833,0.998249,6.813445,0.530648
2,Angola,AGO,0.0,0.0,5.036953,0.174857,5.455321,0.266286,6.773080,0.997714,0.000000,0.0,0.000000,0.0,6.774224,0.998857,0.000000,0.000000
3,Anguilla,AIA,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.197225,1.0,2.197225,1.0,0.000000,0.000000,0.000000,0.000000
4,Åland,ALA,0.0,0.0,0.693147,0.040000,0.000000,0.000000,2.995732,0.760000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,South Africa,ZAF,0.0,0.0,7.306531,0.174091,8.892886,0.851046,8.722743,0.717541,0.000000,0.0,0.000000,0.0,8.764834,0.748393,7.164720,0.150987
240,Zambia,ZMB,0.0,0.0,7.076654,0.232828,6.942157,0.203503,6.453625,0.124779,0.000000,0.0,0.000000,0.0,8.533460,1.000000,0.000000,0.000000
241,Northern Cyprus,ZNC,0.0,0.0,0.000000,0.000000,5.030438,1.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,5.030438,1.000000,0.000000,0.000000
242,Zimbabwe,ZWE,0.0,0.0,7.094235,0.399734,7.804251,0.813413,4.007333,0.017928,0.000000,0.0,0.000000,0.0,7.890208,0.886454,0.000000,0.000000


#### INDEX CALCULATION STEP 2: Min-Max Normalization

In [14]:
df = df.copy()

In [None]:
# Selecting columns to rescale (both log-transformed absolute exposure values & percentage columns)

columns_to_scale = df.columns[2:]

In [None]:
# Rescaling each column to 0–10 using min-max normalization

for col in columns_to_scale:
    col_min = df[col].min()
    col_max = df[col].max()
    
    if col_max == col_min:
        df[col] = 0
    else:
        df[col] = ((df[col] - col_min) / (col_max - col_min)) * 10

In [None]:
# Checking the updated df

df

Unnamed: 0,COUNTRY,GID,coastflood_n,coastflood_pct,rivflood_n,rivflood_pct,watersc_n,watersc_pct,heatwvs_n,heatwvs_pct,cyclns_l_n,cyclns_l_pct,cyclns_u_n,cyclns_u_pct,pm25_l_n,pm25_l_pct,pm25_u_n,pm25_u_pct
0,Aruba,ABW,0.0,0.0,4.604425,10.000000,0.000000,0.000000,0.000000,0.000000,4.556260,10.0,0.000000,0.0,3.686373,4.117647,0.000000,0.000000
1,Afghanistan,AFG,0.0,0.0,6.907487,9.363689,7.015829,8.534734,6.214996,5.399883,0.000000,0.0,0.000000,0.0,6.788049,9.982487,6.762526,5.306480
2,Angola,AGO,0.0,0.0,4.713894,1.748571,5.251415,2.662857,6.162413,9.977143,0.000000,0.0,0.000000,0.0,6.176601,9.988571,0.000000,0.000000
3,Anguilla,AIA,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.034790,10.0,2.068786,10.0,0.000000,0.000000,0.000000,0.000000
4,Åland,ALA,0.0,0.0,0.648690,0.400000,0.000000,0.000000,2.725634,7.600000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,South Africa,ZAF,0.0,0.0,6.837908,1.740910,8.560493,8.510464,7.936292,7.175412,0.000000,0.0,0.000000,0.0,7.991599,7.483931,7.111176,1.509875
240,Zambia,ZMB,0.0,0.0,6.622774,2.328282,6.682677,2.035032,5.871760,1.247786,0.000000,0.0,0.000000,0.0,7.780637,10.000000,0.000000,0.000000
241,Northern Cyprus,ZNC,0.0,0.0,0.000000,0.000000,4.842413,10.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,4.586652,10.000000,0.000000,0.000000
242,Zimbabwe,ZWE,0.0,0.0,6.639227,3.997344,7.512549,8.134130,3.646028,0.179283,0.000000,0.0,0.000000,0.0,7.194133,8.864542,0.000000,0.000000


#### CALCULATION METHODOLOGY STEP 3: Calculation of single unifying indicators for indicators with two thresholds (cyclones & pm2.5)
This step fully follows the CCRI methodology. To calculate a unifying indicator for the two PM2.5 thresholds, the arithmetic mean between the two is taken. To calculate a unifying indicator for the two cyclone thresholds, an inverse geometric mean is taken between the two.

In [19]:
# Arithmetic average for PM2.5

df['pm25_n'] = (df['pm25_l_n'] + df['pm25_u_n']) / 2
df['pm25_pct'] = (df['pm25_l_pct'] + df['pm25_u_pct']) / 2

In [None]:
# Creating function to calculate inverse geomtric average (following the Global INFORM Model for risk indices)

def inverse_geometric_average(df, cols, output_col):

    # 1: Inverting values (higher = better, temporarily)
    inverted = 10 - df[cols]

    # 2: Rescaling to [1, 10]
    scaled = inverted * (9 / 10) + 1

    # 3: Initializing products and valid counts
    scale_product = pd.Series(1.0, index=df.index)
    valid_counts = pd.Series(0, index=df.index)

    for col in scaled.columns:
        mask = scaled[col].notna()
        scale_product[mask] *= scaled.loc[mask, col]
        valid_counts += mask.astype(int)

    # 4: Computing geometric mean only where there are valid values
    gm = scale_product ** (1 / valid_counts)
    gm[valid_counts == 0] = np.nan

    # 5: Rescaling GM to [0, 10] and inverting
    rescaled_gm = (gm - 1) * (10 / 9)
    final_score = 10 - rescaled_gm

    df[output_col] = final_score
    return df

In [23]:
# Calculating geometric averages for cyclones, using the created function

df = inverse_geometric_average(df,['cyclns_l_n', 'cyclns_u_n'],'cyclns_n')
df = inverse_geometric_average(df, ['cyclns_l_pct', 'cyclns_u_pct'], 'cyclns_pct')

In [None]:
# Checking updated df

df

Unnamed: 0,COUNTRY,GID,coastflood_n,coastflood_pct,rivflood_n,rivflood_pct,watersc_n,watersc_pct,heatwvs_n,heatwvs_pct,...,cyclns_u_n,cyclns_u_pct,pm25_l_n,pm25_l_pct,pm25_u_n,pm25_u_pct,pm25_n,pm25_pct,cyclns_n,cyclns_pct
0,Aruba,ABW,0.0,0.0,4.604425,10.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,3.686373,4.117647,0.000000,0.000000,1.843186,2.058824,2.576963,7.597469
1,Afghanistan,AFG,0.0,0.0,6.907487,9.363689,7.015829,8.534734,6.214996,5.399883,...,0.000000,0.0,6.788049,9.982487,6.762526,5.306480,6.775287,7.644483,0.000000,0.000000
2,Angola,AGO,0.0,0.0,4.713894,1.748571,5.251415,2.662857,6.162413,9.977143,...,0.000000,0.0,6.176601,9.988571,0.000000,0.000000,3.088300,4.994286,0.000000,0.000000
3,Anguilla,AIA,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,2.068786,10.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.051804,10.000000
4,Åland,ALA,0.0,0.0,0.648690,0.400000,0.000000,0.000000,2.725634,7.600000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,South Africa,ZAF,0.0,0.0,6.837908,1.740910,8.560493,8.510464,7.936292,7.175412,...,0.000000,0.0,7.991599,7.483931,7.111176,1.509875,7.551388,4.496903,0.000000,0.000000
240,Zambia,ZMB,0.0,0.0,6.622774,2.328282,6.682677,2.035032,5.871760,1.247786,...,0.000000,0.0,7.780637,10.000000,0.000000,0.000000,3.890318,5.000000,0.000000,0.000000
241,Northern Cyprus,ZNC,0.0,0.0,0.000000,0.000000,4.842413,10.000000,0.000000,0.000000,...,0.000000,0.0,4.586652,10.000000,0.000000,0.000000,2.293326,5.000000,0.000000,0.000000
242,Zimbabwe,ZWE,0.0,0.0,6.639227,3.997344,7.512549,8.134130,3.646028,0.179283,...,0.000000,0.0,7.194133,8.864542,0.000000,0.000000,3.597066,4.432271,0.000000,0.000000


In [25]:
# Dropping lower and upper threshold columns, as we don't need them anymore now

df = df.drop(columns=['pm25_l_n', 'pm25_l_pct', 'pm25_u_n', 'pm25_u_pct', 'cyclns_l_n', 'cyclns_l_pct', 
                      'cyclns_u_n', 'cyclns_u_pct'])

#### INDEX CALCULATION STEP 4: Calculation of final indicators using geometric average

In [None]:
# Taking the geometric average between rescaled absolute exposure counts and percentages for each hazard, using the function created earlier

df = inverse_geometric_average(df, ['coastflood_n', 'coastflood_pct'], 'coastflood')
df = inverse_geometric_average(df, ['rivflood_n', 'rivflood_pct'], 'rivflood')
df = inverse_geometric_average(df, ['watersc_n', 'watersc_pct'], 'watersc')
df = inverse_geometric_average(df, ['heatwvs_n', 'heatwvs_pct'], 'heatwvs')
df = inverse_geometric_average(df, ['pm25_n', 'pm25_pct'], 'pm25')
df = inverse_geometric_average(df, ['cyclns_n', 'cyclns_pct'], 'cyclns')

In [28]:
# Retaining only the newly caclulated final indicators, dropping the subcomponents

df = df[[col for col in df.columns if not (col.endswith('_n') or col.endswith('_pct'))]]

In [None]:
# Checking the updated df

df

Unnamed: 0,COUNTRY,GID,coastflood,rivflood,watersc,heatwvs,pm25,cyclns
0,Aruba,ABW,0.0,8.422310,0.000000,0.000000,1.951639,5.635169
1,Afghanistan,AFG,0.0,8.400852,7.862882,5.823122,7.234168,0.000000
2,Angola,AGO,0.0,3.371977,4.075189,8.742216,4.105818,0.000000
3,Anguilla,AIA,0.0,0.000000,0.000000,0.000000,0.000000,7.938431
4,Åland,ALA,0.0,0.525075,0.000000,5.685031,0.000000,0.000000
...,...,...,...,...,...,...,...,...
239,South Africa,ZAF,0.0,4.783333,8.535600,7.576266,6.258818,0.000000
240,Zambia,ZMB,0.0,4.832552,4.771330,3.922410,4.468290,0.000000
241,Northern Cyprus,ZNC,0.0,0.000000,8.471941,0.000000,3.770371,0.000000
242,Zimbabwe,ZWE,0.0,5.470904,7.838062,2.077452,4.026967,0.000000


#### INDEX CALCULATION STEP 5: Calculation of overall School Risk Index

In [31]:
df = df.copy()

In [32]:
indicators = ['coastflood', 'rivflood', 'watersc', 'heatwvs', 'pm25', 'cyclns']

In [None]:
# Calculating the overall School Risk Index "SRI" by taking the inverse geometric average of the final hazard indicators

df = inverse_geometric_average(df, indicators, 'SRI')

In [None]:
# Checking the updated df, sorting by SRI and displaying top 10 countries with highest SRI

df.sort_values(by='SRI', ascending=False).head(10)

Unnamed: 0,COUNTRY,GID,coastflood,rivflood,watersc,heatwvs,pm25,cyclns,SRI
20,Bangladesh,BGD,8.484696,9.365951,7.459488,9.212655,9.386532,4.093757,8.414467
165,Philippines,PHL,8.701225,8.992919,7.053171,8.909384,4.912681,9.647284,8.381293
236,China,Z03,6.799449,6.903975,8.138998,9.469841,8.981981,5.791159,7.947636
95,Indonesia,IDN,7.389049,9.683365,6.645612,9.805199,7.473749,1.752187,7.920515
226,Vietnam,VNM,8.259772,9.320889,4.546153,9.189533,6.953741,3.26862,7.532732
238,India,Z07,4.550895,8.453237,7.774328,9.035943,8.399439,3.883342,7.460665
158,Nepal,NPL,0.0,9.770322,8.448356,7.810501,9.346833,0.0,7.410887
99,Iraq,IRQ,1.287445,7.106159,8.633636,9.389625,9.50851,0.0,7.315971
132,México,MEX,3.612433,6.491566,8.661845,9.782103,5.740317,6.109105,7.307276
48,Cuba,CUB,0.0,7.640334,8.83556,8.965621,2.722689,9.135232,7.288387


In [None]:
# Checking the updated df, sorting by SRI and displaying 10 countries with lowest SRI

# As visible, some countries have no exposure values for any of the hazards resulting in a negligible SRI. This can happen especially for small island states, 
# owed to their small size as compared to the cell size of the hazard exposure rasters, which often struggle to accurately capture exposure in such small land areas.
# These countries and territories will later be removed.

df.sort_values(by='SRI', ascending=True).head(10)

Unnamed: 0,COUNTRY,GID,coastflood,rivflood,watersc,heatwvs,pm25,cyclns,SRI
214,Tuvalu,TUV,0.0,0.0,,0.0,0.0,0.0,-1.776357e-15
201,Seychelles,SYC,0.0,0.0,,0.0,0.0,0.0,-1.776357e-15
163,Pitcairn Islands,PCN,0.0,0.0,,0.0,0.0,0.0,-1.776357e-15
184,"Saint Helena, Ascension and Tris",SHN,0.0,0.0,,0.0,0.0,0.0,-1.776357e-15
152,Norfolk Island,NFK,0.0,0.0,,0.0,0.0,0.0,-1.776357e-15
69,Falkland Islands,FLK,0.0,0.0,0.0,0.0,0.0,0.0,1.776357e-15
96,Isle of Man,IMN,0.0,0.0,0.0,0.0,0.0,0.0,1.776357e-15
131,Maldives,MDV,0.0,0.0,,0.0,2.515714,0.0,0.5560882
86,Greenland,GRL,0.0,0.0,,2.831106,0.0,0.0,0.6347102
194,São Tomé and Príncipe,STP,0.0,0.0,0.0,0.0,3.56112,0.0,0.6930001


#### INDEX CALCULATION STEP 6: Adding GeoData to be able to visualize

In [None]:
# Loading country-level administrative boundaries gdf

gdf = gpd.read_file('/0001_BASE DATA FILES/01_Schools/00_GLOBAL FINAL/gadm administrative boundaries/gadm_410_dissolved.shp')

In [None]:
# Merging the country boundaries gdf with the SRI df

countries_SRI = gdf.merge(df, left_on='GID_0', right_on='GID')

In [None]:
# Checking columns of the merged gdf

countries_SRI.columns

Index(['COUNTRY_x', 'UID', 'GID_0', 'NAME_0', 'VARNAME_0', 'GID_1', 'NAME_1',
       'VARNAME_1', 'NL_NAME_1', 'ISO_1', 'HASC_1', 'CC_1', 'TYPE_1',
       'ENGTYPE_1', 'VALIDFR_1', 'GID_2', 'NAME_2', 'VARNAME_2', 'NL_NAME_2',
       'HASC_2', 'CC_2', 'TYPE_2', 'ENGTYPE_2', 'VALIDFR_2', 'GID_3', 'NAME_3',
       'VARNAME_3', 'NL_NAME_3', 'HASC_3', 'CC_3', 'TYPE_3', 'ENGTYPE_3',
       'VALIDFR_3', 'GID_4', 'NAME_4', 'VARNAME_4', 'CC_4', 'TYPE_4',
       'ENGTYPE_4', 'VALIDFR_4', 'GID_5', 'NAME_5', 'CC_5', 'TYPE_5',
       'ENGTYPE_5', 'GOVERNEDBY', 'SOVEREIGN', 'DISPUTEDBY', 'REGION',
       'VARREGION', 'CONTINENT', 'SUBCONT', 'Shape_Leng', 'Shape_Area',
       'geometry', 'COUNTRY_y', 'GID', 'coastflood', 'rivflood', 'watersc',
       'heatwvs', 'pm25', 'cyclns', 'SRI'],
      dtype='object')

In [None]:
# Retaining only relevant columns

countries_SRI = countries_SRI[['COUNTRY_y', 'SOVEREIGN', 'GID', 'CONTINENT', 'SRI', 'coastflood', 'rivflood', 'watersc',
       'heatwvs', 'pm25', 'cyclns', 'Shape_Leng', 'Shape_Area', 'geometry']].copy()

In [None]:
# Renaming 'COUNTRY_y' column to 'COUNTRY' for clarity

countries_SRI.rename(columns={'COUNTRY_y': 'COUNTRY'}, inplace=True)

In [None]:
# Checking the final countries_SRI gdf

countries_SRI

Unnamed: 0,COUNTRY,SOVEREIGN,GID,CONTINENT,SRI,coastflood,rivflood,watersc,heatwvs,pm25,cyclns,Shape_Leng,Shape_Area,geometry
0,Afghanistan,Afghanistan,AFG,Asia,5.806437,0.000000,8.400852,7.862882,5.823122,7.234168,0.000000,3.456550,0.304174,"MULTIPOLYGON (((63.61554 29.46970, 63.61425 29..."
1,Akrotiri and Dhekelia,United Kingdom,XAD,Asia,2.693935,0.000000,0.000000,8.087619,0.000000,3.393913,0.000000,0.855365,0.010675,"MULTIPOLYGON (((33.00867 34.63230, 33.00764 34..."
2,Albania,Albania,ALB,Europe,5.687297,5.893026,4.276137,7.204302,8.764479,4.103726,0.000000,0.173381,0.001401,"MULTIPOLYGON (((19.83501 40.05079, 19.82779 40..."
3,Algeria,Algeria,DZA,Africa,6.145900,0.000000,6.002149,8.392361,9.374227,6.080294,0.000000,1.095676,0.063365,"MULTIPOLYGON (((1.32778 20.73215, 1.31754 20.7..."
4,American Samoa,United States,ASM,Oceania,2.598729,0.000000,0.000000,,0.000000,0.000000,8.178696,0.053881,0.000119,"MULTIPOLYGON (((-170.76183 -14.36973, -170.762..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,Western Sahara,Western Sahara,ESH,Africa,2.952563,0.434300,0.421159,3.970252,4.332068,6.307326,0.000000,10.638047,3.886799,"MULTIPOLYGON (((-16.83542 22.15431, -16.83569 ..."
240,Yemen,Yemen,YEM,Asia,5.950827,0.346875,5.232645,8.899092,8.428105,6.324710,0.606624,2.254298,0.042142,"MULTIPOLYGON (((42.57459 13.44986, 42.57459 13..."
241,Zambia,Zambia,ZMB,Africa,3.257392,0.000000,4.832552,4.771330,3.922410,4.468290,0.000000,4.718830,0.747447,"POLYGON ((25.87834 -17.97218, 25.87034 -17.970..."
242,Zimbabwe,Zimbabwe,ZWE,Africa,3.855863,0.000000,5.470904,7.838062,2.077452,4.026967,0.000000,0.077080,0.000338,"POLYGON ((27.69676 -20.92233, 27.69684 -20.922..."


#### INDEX CALCULATION STEP 6: Dropping selection of small island developing states, microstates, and overseas territories (except Greenland)
Data on small island developing states and microstates is limited due to their size, warranting their exclusion from the SRI. Overseas territories have been treated separately from their sovereign in the index calculation, owing to the structure of the administrative boundary dataset used to assign the correct country to each individual school. These also excluded from the final index dataset to retain simplicity.

##### Small island states & micro-states

In [None]:
# List of a combination of small island developing states (SIDS), small island states, and microstates to be dropped from the final index dataset

SIDS = [
    "Anguilla", "Antigua and Barbuda", "Barbados", "British Virgin Islands", "Dominica", "Grenada", 
    "Montserrat", "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", 
    "Trinidad and Tobago", "Vanuatu", "Kiribati", "Marshall Islands", "Federated States of Micronesia", 
    "Nauru", "Palau", "Cook Islands", "Niue", "Samoa", "Tonga", "Tuvalu", 
    "Maldives", "Cabo Verde", "Comoros", "Sao Tome and Principe", "Seychelles", "Andorra", 
    "Holy See", "Monaco", "San Marino", "Tokelau", "Turks and Caicos Islands",
]

In [None]:
# Creating a list of all countries in the countries_SRI gdf

allcountries = countries_SRI['COUNTRY'].tolist()

In [None]:
# Using fuzzy matching to find the closest matches for each country in the SIDS-list in the allcountries list

matches = []
for country in SIDS:
    result = process.extractOne(country, allcountries, score_cutoff=80)
    if result is not None:
        match, score, idx = result
        matches.append(match)
    else:
        print(f"No match found for: {country}")

No match found for: Holy See


In [None]:
# checking number of countries in the SIDS list

len(SIDS)

33

In [None]:
# Checking number of matches found in the allcountries list

len(matches)

32

In [None]:
# Dropping the matched SIDS countries from the main countries_SRI gdf

gdf_cleaned = countries_SRI[~countries_SRI['COUNTRY'].isin(matches)]

##### Overseas territories

In [None]:
# Creating a list of overseas territories, which in the administrative boundaries dataset are defined as countries that have a different sovereign than the country itself 
# (e.g., Greenland is a territory of Denmark)

territories = list((gdf_cleaned.loc[gdf_cleaned['COUNTRY'] != gdf_cleaned['SOVEREIGN']])['COUNTRY'])

In [None]:
# Manually removing Greenland from the territories list, as we want to retain it in the final index dataset given its size

territories.remove("Greenland")

In [None]:
# Checking list of overseas territories

territories

['Akrotiri and Dhekelia',
 'American Samoa',
 'Aruba',
 'Bermuda',
 'Bonaire, Sint Eustatius and Saba',
 'Cayman Islands',
 'Christmas Island',
 'Curaçao',
 'Falkland Islands',
 'Faroe Islands',
 'French Guiana',
 'French Polynesia',
 'Gibraltar',
 'Guadeloupe',
 'Guernsey',
 'Isle of Man',
 'Jersey',
 'Martinique',
 'Mayotte',
 'New Caledonia',
 'Norfolk Island',
 'Northern Mariana Islands',
 'Pitcairn Islands',
 'Puerto Rico',
 'Réunion',
 'Saint Helena, Ascension and Tris',
 'Saint Pierre and Miquelon',
 'Saint-Barthélemy',
 'Saint-Martin',
 'Sint Maarten',
 'Svalbard and Jan Mayen',
 'Virgin Islands, U.S.',
 'Wallis and Futuna',
 'Åland']

In [None]:
# Dropping overseas territories from the gdf

gdf_cleaned = gdf_cleaned[~gdf_cleaned['COUNTRY'].isin(territories)]

In [None]:
# Checking list of remaining countries

list(gdf_cleaned['COUNTRY'])

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Caspian Sea',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czechia',
 "Côte d'Ivoire",
 'Democratic Republic of the Congo',
 'Denmark',
 'Djibouti',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'Gabon',
 'Gambia',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Greenland',
 'Guam',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Honduras',
 'Hungary',
 'Iceland',
 'India',
 'Indonesia',
 'Iran',
 'Iraq',
 'Ireland',
 'Israel',
 

In [None]:
# Creating list of territories still left in the gdf to manually remove

manual_drop = ['Caspian Sea','Guam', 'Northern Cyprus', 'Paracel Islands', 'Solomon Islands', 'Spratly Islands']

In [None]:
# Manually removing leftover territories

gdf_cleaned = gdf_cleaned[~gdf_cleaned['COUNTRY'].isin(manual_drop)]

#### Final list of included countries (and final list of dropped countries / territories)

In [None]:
# Final list of countries included in the index

list(gdf_cleaned['COUNTRY'])

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czechia',
 "Côte d'Ivoire",
 'Democratic Republic of the Congo',
 'Denmark',
 'Djibouti',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'Gabon',
 'Gambia',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Greenland',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Honduras',
 'Hungary',
 'Iceland',
 'India',
 'Indonesia',
 'Iran',
 'Iraq',
 'Ireland',
 'Israel',
 'Italy',
 'Jamaica',
 'Ja

In [None]:
# Final list of dropped countries & territories

dropped_countries = list((countries_SRI[~countries_SRI['COUNTRY'].isin(list(gdf_cleaned['COUNTRY']))])["COUNTRY"])
dropped_countries

['Akrotiri and Dhekelia',
 'American Samoa',
 'Andorra',
 'Anguilla',
 'Antigua and Barbuda',
 'Aruba',
 'Barbados',
 'Bermuda',
 'Bonaire, Sint Eustatius and Saba',
 'British Virgin Islands',
 'Cabo Verde',
 'Caspian Sea',
 'Cayman Islands',
 'Christmas Island',
 'Comoros',
 'Cook Islands',
 'Curaçao',
 'Dominica',
 'Falkland Islands',
 'Faroe Islands',
 'French Guiana',
 'French Polynesia',
 'Gibraltar',
 'Grenada',
 'Guadeloupe',
 'Guam',
 'Guernsey',
 'Isle of Man',
 'Jersey',
 'Kiribati',
 'Maldives',
 'Marshall Islands',
 'Martinique',
 'Mayotte',
 'Micronesia',
 'Monaco',
 'Montserrat',
 'Nauru',
 'New Caledonia',
 'Niue',
 'Norfolk Island',
 'Northern Cyprus',
 'Northern Mariana Islands',
 'Palau',
 'Paracel Islands',
 'Pitcairn Islands',
 'Puerto Rico',
 'Réunion',
 'Saint Helena, Ascension and Tris',
 'Saint Kitts and Nevis',
 'Saint Lucia',
 'Saint Pierre and Miquelon',
 'Saint Vincent and the Grenadines',
 'Saint-Barthélemy',
 'Saint-Martin',
 'Samoa',
 'San Marino',
 'Seyc

In [None]:
# Ensuring no country / territory fell through the cracks

print(f"Dropped Countries / Territories: {len(dropped_countries)}")
print(f"Remaining Countries / Territories: {len(gdf_cleaned)}")

print(f"\nSum: {len(dropped_countries) + len(gdf_cleaned)}")

print(f"\nOriginal number of countries / territories: {len(countries_SRI)}")

Dropped Countries / Territories: 72
Remaining Countries / Territories: 172

Sum: 244

Original number of countries / territories: 244


#### INDEX CALCULATION STEP 7: Adding exposure categories in accordance with CCRI categorization

In [None]:
# Creating a function to classify exposure categories based on the SRI score

def classify_exposure(score):
    if pd.isna(score):
        return np.nan
    elif 0.0 <= score <= 2.0:
        return 'Low'
    elif 2.0 < score <= 3.7:
        return 'Low-Medium'
    elif 3.7 < score <= 5.4:
        return 'Medium-High'
    elif 5.4 < score <= 7.0:
        return 'High'
    elif 7.0 < score <= 10.0:
        return 'Extremely High'
    else:
        return np.nan

In [None]:
# Applying classification and creating new "SRI_category" column

gdf_cleaned['SRI_category'] = gdf_cleaned['SRI'].apply(classify_exposure)

In [None]:
# Reordering columns

col = gdf_cleaned.pop('SRI_category')
gdf_cleaned.insert(5, 'SRI_category', col)

#### INDEX CALCULATION STEP 8: Saving data as files

In [None]:
# Saving the final index gdf as a shapefile

gdf_cleaned.to_file("/0002_DATA PRODUCTS/countries_SRI.shp")

  gdf_cleaned.to_file("/Users/Ole/olewelo-Nextcloud/Capstone/Capstone SRI Data Files/Countries/countries_SRI_simplified.shp")


In [None]:
# Dropping geo-columns to save as csv

countries_SRI_simplified = gdf_cleaned.iloc[:,:12].copy()
countries_SRI_simplified

Unnamed: 0,COUNTRY,SOVEREIGN,GID,CONTINENT,SRI,SRI_category,coastflood,rivflood,watersc,heatwvs,pm25,cyclns
0,Afghanistan,Afghanistan,AFG,Asia,5.806437,High,0.000000,8.400852,7.862882,5.823122,7.234168,0.000000
2,Albania,Albania,ALB,Europe,5.687297,High,5.893026,4.276137,7.204302,8.764479,4.103726,0.000000
3,Algeria,Algeria,DZA,Africa,6.145900,High,0.000000,6.002149,8.392361,9.374227,6.080294,0.000000
6,Angola,Angola,AGO,Africa,4.172859,Medium-High,0.000000,3.371977,4.075189,8.742216,4.105818,0.000000
9,Argentina,Argentina,ARG,South America,5.145793,Medium-High,2.400442,5.662329,6.274556,8.330334,4.757936,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
236,Vietnam,Vietnam,VNM,Asia,7.532732,Extremely High,8.259772,9.320889,4.546153,9.189533,6.953741,3.268620
239,Western Sahara,Western Sahara,ESH,Africa,2.952563,Low-Medium,0.434300,0.421159,3.970252,4.332068,6.307326,0.000000
240,Yemen,Yemen,YEM,Asia,5.950827,High,0.346875,5.232645,8.899092,8.428105,6.324710,0.606624
241,Zambia,Zambia,ZMB,Africa,3.257392,Low-Medium,0.000000,4.832552,4.771330,3.922410,4.468290,0.000000


In [None]:
# Saving the simplified gdf as csv file

countries_SRI_simplified.to_csv("/0002_DATA PRODUCTS/countries_SRI.csv", index=False)