In [1]:
# Calcula la exposición al cambio climático para grupos demográficos femeninos

# Importamos librerías
import os
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import rioxarray as rio
import geoviews as gv
import xesmf as xe

In [2]:
# Datos

# Códigos nacionales
ix  = "ISO_A3"

# Carpetas
path_r = "../share/Indexes/"
wb_path = "../../Bases_de_datos/CCKP_NetCDF/"

# Catálogo de datos
path_catalog = "../../Bases_de_datos/Data_catalog.csv"
df_c = pd.read_csv(path_catalog)

# Tabla base
iso = "../../Bases_de_datos/Country_ISO_code.csv"
df_iso = pd.read_csv(iso).set_index("alpha-3")
df_iso.index.name = ix

# Nombres de índices
index_n = [
    "Climate change risk index",
    "Climate change exposure index",
    "Social vulnerability index, physical climate impacts",
    "Sea level rise exposure index",
    "Drought exposure index",
    "Extreme heat exposure index",
    "Extreme rainfall exposure index",
    "Hurricane exposure index",
    "Life expectancy at birth index", "log GNI per capita, PPP index",
    "Gender Development index index", "% rural population index",
    "% population below 15 or above 65 years old index" ]
climate = index_n[1:2] + index_n[3:8]
social = index_n[2:3] + index_n[8:13]

# Índices climáticos y socioeconómicos
df_c = pd.read_csv(path_r + "climate_index.csv", index_col = ix)
df_s = pd.read_csv(path_r + "Physical_vulnerability_index.csv", index_col = ix)
df_r = pd.read_csv(path_r + "climate_risk_index.csv", index_col = ix)
df_iso[climate] = df_c[climate]
df_iso[social] = df_s[social]
df_iso[index_n[0]] = df_r[index_n[0]]
df_iso = df_iso[ df_iso[index_n[1:3]].notnull().all(axis = 1) ]

# Mapas
borders_path = ( "../../Bases_de_datos/Mapas/"
    + "Natural_Earth/ne_50m_admin_0_countries_mod" )
borders = gpd.read_file(borders_path).drop(
    columns = [ix] ).set_index("ISO_A3_EH")
borders.index.name = ix
borders = borders[ borders["ISO_N3_EH"] != "-99" ]
borders = borders[ ~borders.index.duplicated() ]
df_iso["ISO_N3_EH"] = borders["ISO_N3_EH"].astype(int)

In [3]:
# Funciones a utilizar

# Mejora el formato de las tablas para su uso en documentos.
def display(vn, var_i = None, p = False, format = "{:.1f}",
    category = "", type = "category", add = 1):
    # variables
    # vn:       variable principal
    # var_i:    conjunto de variables secundarias, solo si type = "index"
    # p:        indica si la variable es positiva
    # format:   formato a usar
    # category: nombre de la categoría, solo si type = "category"
    # type:     tipo de tabla a crear
    #           "index":    Resumen de variables
    #           "category": Una variable principal
    # add:      indica si sumar o promediar las columnas
    #
    # regresa
    # disp:     Objeto Display de Pandas o un Dataframe
    #           de pandas basado en la tabla de entrada

    # Países sin datos
    no_d = df_iso[ df_iso[vn].isnull() ].shape[0]
    print( f"Countries without data: {no_d} countries" )

    # Escogemos los 5 países más altos y otros más para formar la tabla
    # Resumen de variables
    if   type == "index":
        disp = df_iso.loc[ df_iso[vn].notnull(),
            ["name", vn] + var_i ].sort_values(
            vn, ascending = p ).reset_index(drop = True).head(15).copy()
    # Una variable principal
    elif type == "category":
        disp = df_iso.loc[ df_iso[vn].notnull(), ["name", vn] ].sort_values(
            vn, ascending = p ).reset_index(drop = True).head(15).copy()
    c_list = list( disp[ ["name", vn]
        ].sort_values(vn, ascending = p).head(5)["name"].values )
    print(f"Most vulnerable countries: {', '.join(c_list)}")

    # Categorías geopolíticas y geográficas
    cats = [ "", "", "", "", "", "", "SIDS", "LDC", "LLDC",
        "Asia", "Europe", "Africa", "Oceania", "Americas" ]
    # Iteramos para cada categoría geopolítica
    for r, cat in enumerate(cats[:9]):
        if r in range(0, 6): pass
        else:
            disp.iloc[r, 0] = cat
            # Resumen de variables
            if   type == "index":
                # Sumamos todo
                if add == 1: 
                    disp.iloc[r, 1:] = df_iso.loc[
                        df_iso[cat], [vn] + var_i ].sum()
                # Sumamos la población, promediamos porcentajes
                elif add == 0:
                    disp.iloc[r, 1] = df_iso.loc[
                        df_iso[cat], [vn] ].sum()
                    disp.iloc[r, 2] = df_iso.loc[
                        df_iso[cat], var_i[0] ].mean()
                    disp.iloc[r, 3:] = df_iso.loc[
                        df_iso[cat], var_i[1:] ].sum()
                # Promediamos todo
                elif add == -1:
                    disp.iloc[r, 1:] = df_iso.loc[
                        df_iso[cat], [vn] + var_i ].mean()
            # Una variable principal
            elif type == "category":
                # Sumamos la columna
                if add == 1:
                    disp.iloc[r, 1] = df_iso.loc[df_iso[cat], vn].sum()
                # Promediamos la columna
                elif add == 0:
                    disp.iloc[r, 1] = df_iso.loc[df_iso[cat], vn].mean()
            c_list = list( df_iso.loc[df_iso[cat], ["name", vn]
                ].sort_values( vn, ascending = p
                ).head(5)["name"].values )
            print(f"Most vulnerable {cat}: {', '.join(c_list)}")

    # Iteramos para cada categoría geográfica
    for r, cat in enumerate(cats):
        if r in range(0, 9): pass
        else:
            disp.iloc[r, 0] = cat
            # Resumen de variables
            if   type == "index":
                # Sumamos todo
                if add == 1:
                    disp.iloc[r, 1:] = df_iso.loc[
                        df_iso["region"] == cat, [vn] + var_i ].sum()
                # Sumamos la población, promediamos porcentajes
                elif add == 0:
                    disp.iloc[r, 1] = df_iso.loc[
                        df_iso["region"] == cat, [vn] ].sum()
                    disp.iloc[r, 2] = df_iso.loc[
                        df_iso["region"] == cat, var_i[0] ].mean()
                    disp.iloc[r, 3:] = df_iso.loc[
                        df_iso["region"] == cat, var_i[1:] ].sum()
                # Promediamos todo
                if add == -1:
                    disp.iloc[r, 1:] = df_iso.loc[
                        df_iso["region"] == cat, [vn] + var_i ].mean()
            # Una variable principal
            elif type == "category":
                # Sumamos la columna
                if add == 1:
                    disp.iloc[r, 1] = df_iso.loc[
                        df_iso["region"] == cat, vn].sum()
                # Promediamos la columna
                elif add == 0: 
                    disp.iloc[r, 2] = df_iso.loc[
                        df_iso["region"] == cat, vn].mean()

    # Total mundial
    cat = "World"
    r = 14
    disp.iloc[r, 0] = cat
    # Resumen de variables
    if   type == "index":
        # Sumamos todo
        if add == 1:
            disp.iloc[r, 1:] = df_iso[ [vn] + var_i ].sum()
        # Sumamos la población, promediamos porcentajes
        elif add == 0:
            disp.iloc[r, 1] = df_iso[ [vn] ].sum()
            disp.iloc[r, 2] = df_iso[ var_i[0] ].mean()
            disp.iloc[r, 3:] = df_iso[ var_i[1:] ].sum()
        # Promediamos todo
        elif add == -1:
            disp.iloc[r, 1:] = df_iso[ [vn] + var_i ].mean()
    # Una variable principal
    elif type == "category":
        # Sumamos la columna
        if add == 1:
            disp.iloc[r, 1] = df_iso[vn].sum()
        # Promediamos la columna
        elif add == 0:
            disp.iloc[r, 2] = df_iso[vn].sum()
    
    # Renombramos columnas
    # Resumen de variables
    if   type == "index":
        cols = ["Name", vn] + var_i
    # Una variable principal
    elif type == "category":
        cols = [ "Name", f"{category}" ]
    disp.columns = cols

    # Damos formato
    # Resumen de variables
    if   type == "index":
        # Pasamos la tabla sin formato
        pass
        # Una variable principal
    elif type == "category":
        disp = disp.style.format( { cols[1]: format } )

    # Regresamos la tabla
    return disp

In [4]:
# Pasa todos los archivos a una carpeta
# Correr solo una vez
'''
path   = "/Users/rodrigo/Downloads/"
path_d = f"{path}1km_pregnancies_countries/"
path_r = f"{path}1km_pregnancies/"

countries = os.listdir(path_d)

for c in countries:
    if c == ".DS_Store": pass
    else:
        files = os.listdir(path_d + c)
        iso = files[2][0:4]
        if files[2] == "README.txt": iso = files[3][0:4]
        for f in files:
            if f == "README.txt":
                os.rename( f"{path_d}{c}/{f}", f"{path_r}{iso}{f}" )
            else:
                os.rename(f"{path_d}{c}/{f}", f"{path_r}{f}")
'''

'\npath   = "/Users/rodrigo/Downloads/"\npath_d = f"{path}1km_pregnancies_countries/"\npath_r = f"{path}1km_pregnancies/"\n\ncountries = os.listdir(path_d)\n\nfor c in countries:\n    if c == ".DS_Store": pass\n    else:\n        files = os.listdir(path_d + c)\n        iso = files[2][0:4]\n        if files[2] == "README.txt": iso = files[3][0:4]\n        for f in files:\n            if f == "README.txt":\n                os.rename( f"{path_d}{c}/{f}", f"{path_r}{iso}{f}" )\n            else:\n                os.rename(f"{path_d}{c}/{f}", f"{path_r}{f}")\n'

In [11]:
# Códigos nacionales
ix  = "ISO_A3"

# Carpetas
file_path = "../results/hotspots_1km/" 
data_path = "../../Bases_de_datos/Worldpop/"

m   = 2
a   = 0
g   = 1
g_f = ["f", "m"]
g_n = ["Female", "Male"]
a_f = [0, 1, 5, 10, 15, 20, 25]
a_n = ["0-12 months", "1-5 years"]
# Datos
names     = ["Pregnancies", "wcba2015", "age_sex_structures"]
name_path = [ f"{data_path}{x}/" for x in  names ]
var_n     = [ "Pregnant", "Women of child bearing age",
    f"{g_n[g]} population, {a_n[a]} old" ]
name_n    = [ f" affected {x.lower()}" for x in var_n ]
file_n    = [ f"{x}.csv" for x in names ]
data_n    = [ "_pregs_pp_v2_2015.tif" ]

# Variables de población afectada
vars     = [ "Only extreme rainfall", "Only extreme heat",
             "Only drought", "Only strong hurricanes",
             "Extreme rainfall & heat", "Extreme rainfall & drought",
             "Extreme rainfall & hurricanes", "Extreme heat & drought",
             "Extreme heat & hurricanes", "Drought & strong hurricanes",
             "Extreme rainfall, heat, & drought",
             "Extreme rainfall, heat, & hurricanes",
             "Extreme rainfall, drought, & hurricanes",
             "Extreme heat, drought, & hurricanes",
             "Extreme rainfall, heat, drought, & hurricanes" ]
var_clim = [ "Extreme rainfall", "Extreme heat",
             "Drought", "Strong hurricanes" ]
var_ci   = [
    [ "Only extreme rainfall", "Extreme rainfall & heat",
      "Extreme rainfall & drought", "Extreme rainfall & hurricanes",
      "Extreme rainfall, heat, & drought",
      "Extreme rainfall, heat, & hurricanes",
      "Extreme rainfall, drought, & hurricanes",
      "Extreme rainfall, heat, drought, & hurricanes" ],
    [ "Only extreme heat", "Extreme rainfall & heat",
      "Extreme heat & drought", "Extreme heat & hurricanes",
      "Extreme rainfall, heat, & drought",
      "Extreme rainfall, heat, & hurricanes",
      "Extreme heat, drought, & hurricanes",
      "Extreme rainfall, heat, drought, & hurricanes" ],
    [ "Only drought", "Extreme rainfall & drought",
      "Extreme heat & drought", "Drought & strong hurricanes",
      "Extreme rainfall, heat, drought, & hurricanes",
      "Extreme rainfall, heat, & drought",
      "Extreme rainfall, drought, & hurricanes",
      "Extreme heat, drought, & hurricanes", ],
    [ "Only strong hurricanes", "Extreme rainfall & hurricanes",
      "Extreme heat & hurricanes", "Drought & strong hurricanes",
      "Extreme rainfall, heat, & hurricanes",
      "Extreme rainfall, drought, & hurricanes",
      "Extreme heat, drought, & hurricanes",
      "Extreme rainfall, heat, drought, & hurricanes" ] ]
var_tot  = "Extreme climate"
# Archivos de zonas afectadas
files = [ "pre", "temp", "drought", "hurr",
    "temp_pre", "pre_drought", "pre_hurr",
    "temp_drought", "temp_hurr", "hurr_drought",
    "temp_pre_drought", "temp_pre_hurr",
    "pre_hurr_drought", "temp_drought_hurr",
    "temp_pre_hurr_drought" ]
files = [ f"{x}_2040_2059_SSP245.tif" for x in files ]

name_p    = [   f"{v}{name_n[m]}" for v in vars     ]
name_pp   = [ f"% {v}{name_n[m]}" for v in vars     ]
name_c    = [   f"{v}{name_n[m]}" for v in var_clim ]
name_cp   = [ f"% {v}{name_n[m]}" for v in var_clim ]
name_t  =       f"{var_tot}{name_n[m]}"
name_tp =     f"% {var_tot}{name_n[m]}"

name_ci = []
for x in var_ci:
    name_ci.append( [ f"{v}{name_n[m]}" for v in x ] )

In [12]:
# Creamos la columna de datos  si no existe
if not os.path.exists( f"../share/Indexes/{file_n[m]}" ):
    # Tabla base
    iso = "../../Bases_de_datos/Country_ISO_code.csv"
    df_iso = pd.read_csv(iso).set_index("alpha-3")
    df_iso.index.name = ix
    df_iso = df_iso.rename(columns = {"country-code": "ISO_N3"})
    df_iso[var_n[m]] = np.nan
    df_iso[name_p] = np.nan
    df_iso[name_pp] = None
    df_iso[["name", "ISO_N3", "region", "sub-region", "OECD", "EU27",
        "BRICS+", "BRICS", "LDC", "SIDS", "LLDC"] + name_p + name_pp
        ].to_csv( f"../share/Indexes/{file_n[m]}" )
else:
    df_iso = pd.read_csv( f"../share/Indexes/{file_n[m]}",
        index_col = "ISO_N3" )

if not var_n[m] in df_iso.columns:
    df_iso[var_n[m]] = np.nan
    df_iso[name_p] = np.nan
    df_iso[name_pp] = None

path_c = f"{name_path[m]}global_{g_f[g]}_{a_f[a]}_2020_1km.tif"
countries = xr.open_dataset( f"{data_path}global_level0_1km_2000_2020.tif"
    ).isel(band = 0).drop_vars( ["band", "spatial_ref"] ).rename_dims(
    {"x": "lon", "y": "lat"} ).rename_vars( {"x": "lon", "y": "lat"} )
gender = xr.open_dataset(path_c).isel(band = 0).drop_vars(
    ["band", "spatial_ref"] ).rename_dims(
    {"x": "lon", "y": "lat"} ).rename_vars(
    {"x": "lon", "y": "lat"} )
gender["lat"] = countries["lat"]
gender["lon"] = countries["lon"]
gender["band_data"] = gender["band_data"].T

lat_min     = min( gender["lat"].values[0],
    gender["lat"].values[-1] )
lat_max     = max( gender["lat"].values[0],
    gender["lat"].values[-1] )
lon_min     = min( gender["lon"].values[0],
    gender["lon"].values[-1] )
lon_max     = max( gender["lon"].values[0],
    gender["lon"].values[-1] )

lim_lat = [ slice(lat_max, (lat_max+lat_min)/2),
            slice(lat_max, (lat_max+lat_min)/2),
            slice((lat_max+lat_min)/2, lat_min),
            slice((lat_max+lat_min)/2, lat_min) ]

lim_lon = [ slice(lon_min, (lon_max+lon_min)/2),
            slice((lon_max+lon_min)/2, lon_max),
            slice(lon_min, (lon_max+lon_min)/2),
            slice((lon_max+lon_min)/2, lon_max) ]


# Iteramos para cada categoría climática
for i, v in enumerate(vars):
    print(v)

    if df_iso[name_p[i]].isnull().all().all():
        cols = [var_n[m], name_p[i], name_pp[i]]
        clim = xr.open_dataset( f"{file_path}{files[i]}"
            ).isel(band = 0).drop_vars( ["band", "spatial_ref"] ).rename_dims(
            {"x": "lon", "y": "lat"} ).rename_vars( {"x": "lon", "y": "lat"} )
        clim["lat"] = countries["lat"]
        clim["lon"] = countries["lon"]
    
        countries_i = []

        for j in range( len(lim_lat) ):
            gender_j    =    gender.sel({"lat": lim_lat[j], "lon": lim_lon[j]})
            clim_j      =      clim.sel({"lat": lim_lat[j], "lon": lim_lon[j]})
            countries_j = countries.sel({"lat": lim_lat[j], "lon": lim_lon[j]})
            gender_j    =    gender_j.to_dataframe().reset_index(drop = True)
            clim_j      =      clim_j.to_dataframe().reset_index(drop = True)

            countries_i.append( countries_j.to_dataframe(
                ).reset_index(drop = True) )

            countries_i[j][var_n[m]] = gender_j["band_data"]
            countries_i[j][v]        =   clim_j["band_data"]
            countries_i[j] = countries_i[j].set_index("band_data")
            countries_i[j].index.name = "ISO_N3"
            countries_i[j][name_p[i]] = (
                countries_i[j][v] * countries_i[j][var_n[m]] )
            countries_i[j] = countries_i[j].groupby("ISO_N3").sum()
            countries_i[j].index = countries_i[j].index.astype(int)
        
        countries_i = pd.concat(countries_i).groupby("ISO_N3").sum()
        countries_i[name_pp[i]] = ( 100 * countries_i[name_p[i]]
            / countries_i[var_n[m]] )

        df_iso[cols] = countries_i[cols]

        # Guardamos el archivo
        df_iso.to_csv( f"../share/Indexes/{file_n[m]}" )

# Resultados
df_iso = pd.read_csv( f"../share/Indexes/{file_n[m]}", index_col = ix )
df_iso[name_t] = df_iso[name_p].sum(axis = 1)
df_iso[name_tp] = ( 100 * df_iso[name_t] / df_iso[var_n[m]] )
for i, v in enumerate(name_c):
    df_iso[v]  = df_iso[name_ci[i]].sum(axis = 1)
    df_iso[name_cp[i]] = 100 * df_iso[v] / df_iso[var_n[m]]
df_iso.to_csv( f"../share/Indexes/{file_n[m]}" )

print(f"Number of countries: {df_iso.dropna().shape[0]}")

disp = display(name_t, var_i = [ name_tp ] + name_c,
    p = False, type = "index", add = 0)
cols = disp.columns
disp = disp.style.format( { cols[1]: "{:,.0f}", cols[2]: "{:.1f}",
    cols[3]: "{:,.0f}", cols[4]: "{:,.0f}",
    cols[5]: "{:,.0f}", cols[6]: "{:,.0f}" } )
disp

Only extreme rainfall
Only extreme heat
Only drought
Only strong hurricanes
Extreme rainfall & heat
Extreme rainfall & drought
Extreme rainfall & hurricanes
Extreme heat & drought
Extreme heat & hurricanes
Drought & strong hurricanes
Extreme rainfall, heat, & drought
Extreme rainfall, heat, & hurricanes
Extreme rainfall, drought, & hurricanes
Extreme heat, drought, & hurricanes
Extreme rainfall, heat, drought, & hurricanes
Number of counries: 240
Number of countries: 240
Countries without data: 0 countries
Most vulnerable countries: India, Pakistan, Nigeria, China, Philippines
Most vulnerable SIDS: Dominican Republic, Cuba, Haiti, Guinea-Bissau, Jamaica
Most vulnerable LDC: Sudan, Niger, Mali, Burkina Faso, Chad
Most vulnerable LLDC: Niger, Mali, Burkina Faso, Chad, South Sudan


Unnamed: 0,Name,"Extreme climate affected male population, 0-12 months old","% Extreme climate affected male population, 0-12 months old","Extreme rainfall affected male population, 0-12 months old","Extreme heat affected male population, 0-12 months old","Drought affected male population, 0-12 months old","Strong hurricanes affected male population, 0-12 months old"
0,India,9621862,70.6,96947,9617267,0,0
1,Pakistan,2447981,85.9,746708,2386712,0,0
2,Nigeria,2205774,60.4,0,2205774,0,0
3,China,1827083,22.3,2500,2657,0,1821968
4,Philippines,976590,79.8,0,18176,0,974199
5,Egypt,947076,94.2,748702,844584,944654,0
6,SIDS,309616,50.9,1643,29322,73121,249974
7,LDC,4537950,35.4,145876,3960456,504894,36292
8,LLDC,2595324,24.5,15204,2357466,239877,16
9,Asia,18901237,46.6,1750884,14604065,579290,3737374


In [None]:
# Parámetros de visualización de tablas
pd.options.display.float_format = '{:,.0f}'.format
df_iso.loc[ df_iso[name_tp] >= 100, ["name", name_t, name_tp] + name_c
    ].sort_values(name_tp, ascending = False)

Unnamed: 0_level_0,name,"Extreme climate affected female population, 0-12 months old","% Extreme climate affected female population, 0-12 months old","Extreme rainfall affected female population, 0-12 months old","Extreme heat affected female population, 0-12 months old","Drought affected female population, 0-12 months old","Strong hurricanes affected female population, 0-12 months old"
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BHR,Bahrain,14451,100,14451,13124,0,0
ISR,Israel,75300,100,549,2632,75300,0
NER,Niger,521189,100,4221,521189,0,0
QAT,Qatar,31107,100,31107,31069,0,0
VGB,Virgin Islands (British),182,100,0,0,0,182
DMA,Dominica,689,100,0,0,689,300
SXM,Sint Maarten (Dutch part),170,100,0,0,0,170
MTQ,Martinique,2702,100,0,0,2702,0
BHS,Bahamas,2710,100,0,0,0,2710
TCA,Turks and Caicos Islands,304,100,0,0,0,304
