In [1]:
# Calcula un índice por país de vulnerabilidad socioeconómica

# Importamos librerías
import numpy as np
import pandas as pd
import geopandas as gpd
import geoviews as gv
from scipy import stats

# Parámetros de mapas
gv.extension("matplotlib")
def hook(plot, element):
    cax = plot.handles["cax"]
    ax = plot.handles["axis"]
    bbox = ax.get_position()
    l, b, w, h = bbox.x0, bbox.y0, bbox.width, bbox.height
    cax.set_position([l, 0.9*b, w, 0.05*h])
options = { "colorbar": True, "linewidth": 0.4,
    "hooks": [hook], "ylim":(-62,85), "xlim":(-180,180),
    "colorbar_opts": {"orientation": "horizontal"} }
options_m = { "bgcolor": "lightgray", "fontscale": 2, "aspect": 2.25 }

In [2]:
# Datos

# Códigos nacionales
ix  = "ISO_A3"

# Carpetas
path_r = "../results/"

# Catálogo de datos
path_catalog = "../../Bases_de_datos/Data_catalog.csv"
df_c = pd.read_csv(path_catalog)

# Mapa
borders_path = ( "../../Bases_de_datos/Mapas/"
    + "Natural_Earth/ne_50m_admin_0_countries_mod" )
borders = gpd.read_file(borders_path).drop(
    columns = [ix] ).set_index("ISO_A3_EH")
borders.index.name = ix
borders = borders[ borders["ISO_N3_EH"] != "-99" ]
borders = borders[ ~borders.index.duplicated() ]

# Océano
ocean_path = ( "../../Bases_de_datos/Mapas/"
    + "Natural_Earth/ne_50m_ocean" )
ocean = gv.Polygons( gpd.read_file(ocean_path), vdims = "min_zoom"
    ).opts( linewidth = 0, cmap = "Paired")

# Tabla base
iso = "../../Bases_de_datos/Country_ISO_code.csv"
df_iso = pd.read_csv(iso).set_index("alpha-3")
df_iso = df_iso.drop( ["ATA", "ATF", "HMD", "SJM", "SXM", "VAT",
    "BVT", "SGS", "UMI", "ESH", "TKL", "IOT"] )
df_iso.index.name = ix

# Nombres de variables a usar
vars = [
    "Mean years of schooling",
    "Scientific articles per million inhabitants",
    "Young workforce ratio (20-44 to 45-64 years old)",
    "Gender inequality index",
    "Total debt service [% of GNI]",
    "Central government debt [% of GDP]",
    "Gini coefficient"
    ]
var_i = [v + "_index" for v in vars]

In [3]:
# Mean years of schooling

# Cargamos el archivo de desarrollo
id = "GDI_HDI"
df = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0],
    index_col = ix ).drop(columns = ["Source"])


# Seleccionamos la variable
i = 0

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
# Eliminamos Mónaco por motivos de visualización
#borders.loc["MCO"] = np.nan
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 1
max = 14.5
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i]).head(10)

min: 1.3, max: 14.3


Sin datos: 48 países


Unnamed: 0_level_0,Name,Mean years of schooling
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
NER,Niger,1.3
MLI,Mali,1.6
SOM,Somalia,1.9
BFA,Burkina Faso,2.3
TCD,Chad,2.3
GIN,Guinea,2.4
ETH,Ethiopia,2.4
AFG,Afghanistan,2.5
YEM,Yemen,2.8
SEN,Senegal,2.9


In [4]:
# Scientific articles per million inhabitants
id = "IP.JRN.ARTC.SC"
i = 1

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], skiprows = 3
    ).rename( columns = {"Country Code": ix} ).set_index(ix)
df = df.reset_index().set_index("SVN_A3")
df[vars[i]] = df_i.iloc[ :, 4:-1 ].ffill(axis = 1).iloc[:, -1]
df["POP_EST"] = borders["POP_EST"]
df[vars[i]] = df[vars[i]] / df["POP_EST"] * 1e6

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 0
max = 2750
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i]).head(10)

min: 0.8823044714212364, max: 2691.0194858628133


Sin datos: 3 países


Unnamed: 0_level_0,Name,Scientific articles per million inhabitants
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
TCD,Chad,0.882304
TKM,Turkmenistan,0.9256
GNQ,Equatorial Guinea,1.2537
SSD,South Sudan,1.289989
AGO,Angola,1.426852
COD,Congo (Democratic Republic of the),1.979017
LBR,Liberia,2.444619
SLV,El Salvador,2.629559
NER,Niger,2.730504
BDI,Burundi,2.885371


In [12]:
# Young workforce

# Cargamos el archivo de población
id = "Population"

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], index_col = "Country Code")
df_i.index.name = ix

v_pop = [
    "SP.POP.2024.FE", "SP.POP.2024.MA", "SP.POP.2529.FE",
    "SP.POP.2529.MA", "SP.POP.3034.FE", "SP.POP.3034.MA",
    "SP.POP.3539.FE", "SP.POP.3539.MA", "SP.POP.4044.FE",
    "SP.POP.4044.MA", "SP.POP.4549.FE", "SP.POP.4549.MA",
    "SP.POP.5054.FE", "SP.POP.5054.MA", "SP.POP.5559.FE",
    "SP.POP.5559.MA", "SP.POP.6064.FE", "SP.POP.6064.MA" 
    ]
for v in v_pop:
    df[v] = df_i.loc[ df_i["Indicator Code"] == v, "2023" ]


# Seleccionamos la variable
i = 2

df[vars[i]] = df[v_pop[0:10]].sum(axis = 1) / df[v_pop[8:]].sum(axis = 1)

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
# Eliminamos Lietchtenstein por motivos de visualización
#borders.loc["MCO"] = np.nan
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 0.25
max = 3
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i]).head(10)

min: 0.5479980959433014, max: 2.8960718244797614


Sin datos: 24 países


Unnamed: 0_level_0,Name,Young workforce ratio (20-44 to 45-64 years old)
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
SXM,Sint Maarten,0.547998
SMR,San Marino,0.664783
MNP,Northern Mariana Islands,0.71191
BMU,Bermuda,0.716294
MAF,Saint Martin,0.724722
KWT,Kuwait,0.734387
ITA,Italy,0.73818
UKR,Ukraine,0.742026
VIR,U.S. Virgin Islands,0.774945
ESP,Spain,0.780935


In [6]:
# Gender inequality index
i = 3

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
# Eliminamos Mónaco por motivos de visualización
#borders.loc["MCO"] = np.nan
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 0
max = 0.85
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i], ascending = False).head(10)

min: 0.009, max: 0.82


Sin datos: 75 países


Unnamed: 0_level_0,Name,Gender inequality index
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
YEM,Yemen,0.82
NGA,Nigeria,0.677
SOM,Somalia,0.674
TCD,Chad,0.671
AFG,Afghanistan,0.665
LBR,Liberia,0.656
BEN,Benin,0.649
GNB,Guinea-Bissau,0.631
HTI,Haiti,0.621
SLE,Sierra Leone,0.613


In [7]:
# Total debt service [% of GNI]
id = "DT.TDS.DECT.GN.ZS"
i = 4

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], skiprows = 3
    ).rename( columns = {"Country Code": ix} ).set_index(ix)
df = df.reset_index().set_index("SVN_A3")
df[vars[i]] = df_i.iloc[ :, 4:-1 ].ffill(axis = 1).iloc[:, -1]

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
min = 10 * np.floor( borders[vars[i]].min() * 0.1 )
max = 37.5#10 * np.ceil( borders[vars[i]].max() * 0.1 )
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más altos
df[ ["Name", vars[i] ] ].sort_values(vars[i], ascending = False).head(10)

Sin datos: 116 países


Unnamed: 0_level_0,Name,Total debt service [% of GNI]
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
MOZ,Mozambique,35.564801
KAZ,Kazakhstan,22.094875
MNG,Mongolia,21.547483
LBN,Lebanon,20.940254
NIC,Nicaragua,16.571052
PAN,Panama,16.362585
AGO,Angola,15.900413
MNE,Montenegro,15.794174
PNG,Papua New Guinea,14.941915
GEO,Georgia,13.908063


In [8]:
# Central government debt [% of GDP]
id = "GC.DOD.TOTL.GD.ZS"
i = 5

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], skiprows = 3
    ).rename( columns = {"Country Code": ix} ).set_index(ix)
df = df.reset_index().set_index("SVN_A3")
df[vars[i]] = df_i.iloc[ :, 4:-1 ].ffill(axis = 1).iloc[:, -1]
df[vars[i]] = df[vars[i]].where( df[vars[i-1]].isnull(), np.nan )

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
min = 10 * np.floor( borders[vars[i]].min() * 0.1 )
max = 225#np.ceil( borders[vars[i]].max() )
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más altos
df[ ["Name", vars[i] ] ].sort_values(vars[i], ascending = False).head(30)

Sin datos: 146 países


Unnamed: 0_level_0,Name,Central government debt [% of GDP]
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
JPN,Japan,216.213375
GRC,Greece,203.285809
SGP,Singapore,150.141913
BRB,Barbados,146.460053
GBR,United Kingdom,142.283042
GBR,Jersey,142.283042
GBR,Guernsey,142.283042
GBR,Isle of Man,142.283042
GBR,Falkland Islands,142.283042
GBR,Cayman Islands,142.283042


In [9]:
# Gini coefficient
id = "SI.POV.GINI"
i = 6

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0][:-4] + "_extra.csv",
    skiprows = 4 ).rename( columns = {"Country Code": ix}
    ).drop(columns = ["Source"])
df = df.reset_index().set_index(ix)
df[vars[i]] = df_i.set_index(ix).iloc[ :, 4: ].ffill(axis = 1).iloc[:, -1]

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
min = 5 * np.floor( borders[vars[i]].min() * 0.2 )
max = 5 * np.ceil( borders[vars[i]].max() * 0.2 )
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más altos
df[ ["Name", vars[i] ] ].sort_values(vars[i], ascending = False).head(10)

Sin datos: 0 países


Unnamed: 0_level_0,Name,Gini coefficient
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
BMU,Bermuda,63.0
ZAF,South Africa,63.0
BHR,Bahrain,59.6
NAM,Namibia,59.1
BHS,Bahamas,57.5
COL,Colombia,54.8
SWZ,Eswatini (Kingdom of),54.6
HKG,"Hong Kong, China (SAR)",53.9
BWA,Botswana,53.3
BLZ,Belize,53.3


In [10]:
# Índice de vulnerabilidad socioeconómica
index_n = "Social vulnerability index, climate transition impacts"

# Índice por unidad geográfico ISO-3166-1
df = df.reset_index().set_index(ix)

# Cálculo del índice para cada categoría
var_i = [v + "_index" for v in vars]
# Países con al menos 4 valores
df = df[ df[vars].count(axis = 1) > 3 ]
# Normalización 0-10
for i, v in enumerate(vars):
    df[ var_i[i] ] = stats.percentileofscore( df[ vars[i] ],
        df[ vars[i] ], nan_policy = "omit" ) / 10
    #min_v = df[v].min()
    #max_v = df[v].max()
    #rng_v = max_v - min_v
    #df[ var_i[i] ] = 10 * ( df[v] - min_v ) / rng_v
    if v in vars[0:3]: df[ var_i[i] ] = 10 - df[ var_i[i] ]

# Cálculo del índice
df[index_n] = df[var_i].mean(axis = 1)
# Normalización 0-10
min_v = df[index_n].min()
max_v = df[index_n].max()
rng_v = max_v - min_v
df[ index_n ] = 10 * ( df[index_n] - min_v ) / rng_v

# Guardamos el archivo
df.to_csv(path_r + "socioeconomic_index.csv")

# Mapa
borders[index_n] = df[index_n]
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(index_n, range = (0, 10.0001) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Guardamos el archivo
df[["Name", index_n] + vars + var_i].to_csv(
    "../share/Indexes/Transition_vulnerability_index.csv" )

# 10 países más altos
df[["Name", index_n] + var_i].sort_values(index_n, ascending = False).head(10)

Unnamed: 0_level_0,Name,"Social vulnerability index, climate transition impacts",Mean years of schooling_index,Scientific articles per million inhabitants_index,Young workforce ratio (20-44 to 45-64 years old)_index,Gender inequality index_index,Total debt service [% of GNI]_index,Central government debt [% of GDP]_index,Gini coefficient_index
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
PNG,Papua New Guinea,10.0,8.671875,9.025424,2.457627,9.096386,9.36,,7.711864
AGO,Angola,9.847009,7.994792,9.788136,0.974576,8.012048,9.52,,9.449153
MOZ,Mozambique,9.72122,9.166667,9.194915,0.466102,7.108434,10.0,,9.322034
SSD,South Sudan,9.49334,8.151042,9.830508,3.262712,,,,8.347458
NIC,Nicaragua,9.190203,6.901042,9.152542,2.79661,5.843373,9.68,,8.855932
LSO,Lesotho,8.888592,6.666667,8.050847,1.186441,8.493976,9.12,,8.559322
KHM,Cambodia,8.86232,8.515625,8.516949,3.432203,7.349398,8.4,,5.762712
COG,Congo,8.82031,6.25,8.220339,2.5,8.674699,7.04,,9.131356
MTQ,Martinique,8.813555,,2.881356,8.728814,,,6.631579,9.618644
BMU,Bermuda,8.732908,,1.440678,7.288136,,,8.947368,9.978814


In [11]:
# Información para un país específico
country = "NIC"

df.loc[country, ["Name", index_n] + vars]

Name                                                      Nicaragua
Social vulnerability index, climate transition impacts     9.190203
Mean years of schooling                                         7.3
Scientific articles per million inhabitants                5.755097
Young workforce ratio (20-44 to 45-64 years old)            1.79868
Gender inequality index                                       0.397
Total debt service [% of GNI]                             16.571052
Central government debt [% of GDP]                              NaN
Gini coefficient                                               46.2
Name: NIC, dtype: object