In [47]:
# Calcula un índice por país de vulnerabilidad socioeconómica

# Importamos librerías
import numpy as np
import pandas as pd
import geopandas as gpd
import geoviews as gv
from scipy import stats

# Parámetros de mapas
gv.extension("matplotlib")
def hook(plot, element):
    cax = plot.handles["cax"]
    ax = plot.handles["axis"]
    bbox = ax.get_position()
    l, b, w, h = bbox.x0, bbox.y0, bbox.width, bbox.height
    cax.set_position([l, 0.9*b, w, 0.05*h])
options = { "colorbar": True, "linewidth": 0.4,
    "hooks": [hook], "ylim":(-62,85), "xlim":(-180,180),
    "colorbar_opts": {"orientation": "horizontal"} }
options_m = { "bgcolor": "lightgray", "fontscale": 2, "aspect": 2.25 }

In [48]:
# Datos

# Códigos nacionales
ix  = "ISO_A3"

# Carpetas
path_r = "../results/"

# Catálogo de datos
path_catalog = "../../Bases_de_datos/Data_catalog.csv"
df_c = pd.read_csv(path_catalog)

# Mapa
borders_path = ( "../../Bases_de_datos/Mapas/"
    + "Natural_Earth/ne_50m_admin_0_countries_mod" )
borders = gpd.read_file(borders_path).drop(
    columns = [ix] ).set_index("ISO_A3_EH")
borders.index.name = ix
borders = borders[ borders["ISO_N3_EH"] != "-99" ]
borders = borders[ ~borders.index.duplicated() ]

# Océano
ocean_path = ( "../../Bases_de_datos/Mapas/"
    + "Natural_Earth/ne_50m_ocean" )
ocean = gv.Polygons( gpd.read_file(ocean_path), vdims = "min_zoom"
    ).opts( linewidth = 0, cmap = "Paired")

# Tabla base
iso = "../../Bases_de_datos/Country_ISO_code.csv"
df_iso = pd.read_csv(iso).set_index("alpha-3")
df_iso = df_iso.drop( ["ATA", "ATF", "HMD", "SJM", "SXM", "VAT",
    "BVT", "SGS", "UMI", "ESH", "TKL", "IOT"] )
df_iso.index.name = ix

# Nombres de variables a usar
vars = [
    "Mean years of schooling",
    "Scientific articles per million inhabitants",
    "Young workforce ratio (20-39 to 40-65 years old)",
    "Gender inequality index",
    "Total debt service [% of GNI]",
    "Central government debt [% of GDP]",
    "Gini coefficient"
    ]
var_i = [v + "_index" for v in vars]

In [49]:
# Primeras dos categorías
id = "GDI_HDI"

# Cargamos el archivo
df = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0],
    index_col = ix ).drop(columns = ["Source"])


# Mean years of schooling
i = 0

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
# Eliminamos Mónaco por motivos de visualización
#borders.loc["MCO"] = np.nan
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 1
max = 14.5
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i]).head(10)

min: 1.3, max: 14.3


Sin datos: 48 países


Unnamed: 0_level_0,Name,Mean years of schooling
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
NER,Niger,1.3
MLI,Mali,1.6
SOM,Somalia,1.9
BFA,Burkina Faso,2.3
TCD,Chad,2.3
GIN,Guinea,2.4
ETH,Ethiopia,2.4
AFG,Afghanistan,2.5
YEM,Yemen,2.8
SEN,Senegal,2.9


In [50]:
# Scientific articles per million inhabitants
id = "IP.JRN.ARTC.SC"
i = 1

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], skiprows = 3
    ).rename( columns = {"Country Code": ix} ).set_index(ix)
df = df.reset_index().set_index("SVN_A3")
df[vars[i]] = df_i.iloc[ :, 4:-1 ].ffill(axis = 1).iloc[:, -1]
df["POP_EST"] = borders["POP_EST"]
df[vars[i]] = df[vars[i]] / df["POP_EST"] * 1e6

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 0
max = 2750
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i]).head(10)

min: 0.8823044714212364, max: 2691.0194858628133


Sin datos: 3 países


Unnamed: 0_level_0,Name,Scientific articles per million inhabitants
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
TCD,Chad,0.882304
TKM,Turkmenistan,0.9256
GNQ,Equatorial Guinea,1.2537
SSD,South Sudan,1.289989
AGO,Angola,1.426852
COD,Congo (Democratic Republic of the),1.979017
LBR,Liberia,2.444619
SLV,El Salvador,2.629559
NER,Niger,2.730504
BDI,Burundi,2.885371


In [51]:
# Cargamos el archivo de población
id = "Population"

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], index_col = "Country Code")
df_i.index.name = ix

v_pop = [
    "SP.POP.2024.FE", "SP.POP.2024.MA", "SP.POP.2529.FE",
    "SP.POP.2529.MA", "SP.POP.3034.FE", "SP.POP.3034.MA",
    "SP.POP.3539.FE", "SP.POP.3539.MA", "SP.POP.4044.FE",
    "SP.POP.4044.MA", "SP.POP.4549.FE", "SP.POP.4549.MA",
    "SP.POP.5054.FE", "SP.POP.5054.MA", "SP.POP.5559.FE",
    "SP.POP.5559.MA", "SP.POP.6064.FE", "SP.POP.6064.MA" 
    ]
for v in v_pop:
    df[v] = df_i.loc[ df_i["Indicator Code"] == v, "2023" ]

# Young workforce
i = 2

df[vars[i]] = df[v_pop[0:8]].sum(axis = 1) / df[v_pop[8:]].sum(axis = 1)

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
# Eliminamos Lietchtenstein por motivos de visualización
#borders.loc["MCO"] = np.nan
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 0.25
max = 2.75
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i]).head(10)

min: 0.47041153170111133, max: 2.5880698619279876


Sin datos: 1 países


Unnamed: 0_level_0,Name,Young workforce ratio (20-39 to 40-65 years old)
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
KWT,Kuwait,0.470412
SMR,San Marino,0.503187
UKR,Ukraine,0.529814
ITA,Italy,0.56913
ESP,Spain,0.583726
GRC,Greece,0.593756
AND,Andorra,0.597261
JPN,Japan,0.605144
MCO,Monaco,0.608179
HKG,"Hong Kong, China (SAR)",0.610128


In [52]:
# Gender inequality index
i = 3

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
# Eliminamos Mónaco por motivos de visualización
#borders.loc["MCO"] = np.nan
print(f"min: {borders[vars[i]].min()}, max: {borders[vars[i]].max()}")
# borders[vars[i]].dropna().sort_values()
min = 0
max = 0.85
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más bajos
df[ ["Name", vars[i] ] ].sort_values(vars[i]).head(10)

min: 0.009, max: 0.82


Sin datos: 75 países


Unnamed: 0_level_0,Name,Gender inequality index
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
DNK,Denmark,0.009
NOR,Norway,0.012
CHE,Switzerland,0.018
SWE,Sweden,0.023
NLD,Netherlands,0.025
FIN,Finland,0.032
ARE,United Arab Emirates,0.035
SGP,Singapore,0.036
ISL,Iceland,0.039
LUX,Luxembourg,0.043


In [53]:
# Total debt service [% of GNI]
id = "DT.TDS.DECT.GN.ZS"
i = 4

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], skiprows = 3
    ).rename( columns = {"Country Code": ix} ).set_index(ix)
df = df.reset_index().set_index("SVN_A3")
df[vars[i]] = df_i.iloc[ :, 4:-1 ].ffill(axis = 1).iloc[:, -1]

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
min = 10 * np.floor( borders[vars[i]].min() * 0.1 )
max = 37.5#10 * np.ceil( borders[vars[i]].max() * 0.1 )
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más altos
df[ ["Name", vars[i] ] ].sort_values(vars[i], ascending = False).head(10)

Sin datos: 116 países


Unnamed: 0_level_0,Name,Total debt service [% of GNI]
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
MOZ,Mozambique,35.564801
KAZ,Kazakhstan,22.094875
MNG,Mongolia,21.547483
LBN,Lebanon,20.940254
NIC,Nicaragua,16.571052
PAN,Panama,16.362585
AGO,Angola,15.900413
MNE,Montenegro,15.794174
PNG,Papua New Guinea,14.941915
GEO,Georgia,13.908063


In [54]:
# Central government debt [% of GDP]
id = "GC.DOD.TOTL.GD.ZS"
i = 5

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0], skiprows = 3
    ).rename( columns = {"Country Code": ix} ).set_index(ix)
df = df.reset_index().set_index("SVN_A3")
df[vars[i]] = df_i.iloc[ :, 4:-1 ].ffill(axis = 1).iloc[:, -1]
df[vars[i]] = df[vars[i]].where( df[vars[i-1]].isnull(), np.nan )

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
min = 10 * np.floor( borders[vars[i]].min() * 0.1 )
max = 225#np.ceil( borders[vars[i]].max() )
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más altos
df[ ["Name", vars[i] ] ].sort_values(vars[i], ascending = False).head(10)

Sin datos: 146 países


Unnamed: 0_level_0,Name,Central government debt [% of GDP]
SVN_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
JPN,Japan,216.213375
GRC,Greece,203.285809
SGP,Singapore,150.141913
BRB,Barbados,146.460053
GBR,United Kingdom,142.283042
GBR,Jersey,142.283042
GBR,Guernsey,142.283042
GBR,Isle of Man,142.283042
GBR,Falkland Islands,142.283042
GBR,Cayman Islands,142.283042


In [55]:
# Gini coefficient
id = "SI.POV.GINI"
i = 6

# Cargamos el archivo
df_i = pd.read_csv( "../../" + df_c.loc[df_c["ID"]==id, "Path"].iloc[0]
    + df_c.loc[df_c["ID"]==id, "Filename" ].iloc[0][:-4] + "_extra.csv",
    skiprows = 4 ).rename( columns = {"Country Code": ix}
    ).drop(columns = ["Source"])
df = df.reset_index().set_index(ix)
df[vars[i]] = df_i.set_index(ix).iloc[ :, 4: ].ffill(axis = 1).iloc[:, -1]

# Mapa
borders[vars[i]] = df.reset_index().set_index(ix)[vars[i]]
min = 5 * np.floor( borders[vars[i]].min() * 0.2 )
max = 5 * np.ceil( borders[vars[i]].max() * 0.2 )
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(vars[i], range = (min, max) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# Países sin datos
print( f"Sin datos: {df[ df[vars[i]].isnull() ].shape[0]} países" )
# 10 países más altos
df[ ["Name", vars[i] ] ].sort_values(vars[i], ascending = True).head(10)

Sin datos: 0 países


Unnamed: 0_level_0,Name,Gini coefficient
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1
FRO,Faroe Islands,22.7
VGB,British Virgin Islands,23.0
SVK,Slovakia,24.1
SVN,Slovenia,24.3
BLR,Belarus,24.4
UKR,Ukraine,25.6
ALA,Aland,25.6
NLD,Netherlands,25.7
MDA,Moldova (Republic of),25.7
ARE,United Arab Emirates,26.0


In [59]:
df[ df[vars].count(axis = 1) > 4 ]

Unnamed: 0_level_0,SVN_A3,Name,HDI,GDI,Mean years of schooling,Life expectancy at birth,GNI per capita,Gender inequality index,Scientific articles per million inhabitants,POP_EST,...,SP.POP.5054.FE,SP.POP.5054.MA,SP.POP.5559.FE,SP.POP.5559.MA,SP.POP.6064.FE,SP.POP.6064.MA,Young workforce ratio (20-39 to 40-65 years old),Total debt service [% of GNI],Central government debt [% of GDP],Gini coefficient
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
QAT,QAT,Qatar,0.875,1.027,10.1,81.6,95944.0,0.212,828.592685,2832067.0,...,32796.0,117226.0,21114.0,66147.0,13533.0,37814.0,1.490668,,,41.1
SGP,SGP,Singapore,0.949,0.991,11.9,84.1,88761.0,0.036,2142.726773,5703569.0,...,225789.0,253206.0,218510.0,250030.0,216285.0,250580.0,0.703765,,150.141913,45.9
IRL,IRL,Ireland,0.950,0.991,11.7,82.7,87468.0,0.072,1732.849750,4941444.0,...,178476.0,176443.0,158420.0,154810.0,144316.0,139578.0,0.750480,,46.714242,30.1
LUX,LUX,Luxembourg,0.927,0.993,13.0,82.6,78554.0,0.043,1527.578820,619896.0,...,23209.0,24634.0,22560.0,24609.0,19577.0,20421.0,0.816144,,2.901345,32.7
ARE,ARE,United Arab Emirates,0.937,0.986,12.8,79.2,74104.0,0.035,454.757363,9770529.0,...,127864.0,389625.0,80902.0,225655.0,51194.0,113096.0,1.535701,,1.845685,26.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YEM,YEM,Yemen,0.424,0.456,2.8,63.7,1106.0,0.820,9.690719,29161922.0,...,469745.0,460174.0,360885.0,337401.0,270617.0,238430.0,2.224595,0.530089,,36.7
COD,COD,Congo (Democratic Republic of the),0.481,0.891,7.2,59.7,1080.0,0.605,1.979017,86790567.0,...,1388166.0,1303910.0,1135077.0,1044584.0,883285.0,793144.0,2.010428,1.089154,,44.7
SOM,SOM,Somalia,0.380,0.769,1.9,56.1,1072.0,0.674,3.211242,10192317.3,...,227418.0,213393.0,185043.0,167048.0,147092.0,129087.0,2.083772,0.188488,,36.8
CAF,CAF,Central African Republic,0.387,0.810,4.0,54.5,869.0,,3.650016,4745185.0,...,68149.0,57186.0,62403.0,47415.0,51180.0,39482.0,2.382441,0.787758,,43.0


In [62]:
# Índice de vulnerabilidad socioeconómica
index_n = "Socioeconomic vulnerability index"

# Índice por unidad geográfico ISO-3166-1
df = df.reset_index().set_index(ix)

# Cálculo del índice para cada categoría
var_i = [v + "_index" for v in vars]
# Países con al menos 4 valores
df = df[ df[vars].count(axis = 1) > 3 ]
# Normalización 0-10
for i, v in enumerate(vars):
    df[ var_i[i] ] = stats.percentileofscore( df[ vars[i] ],
        df[ vars[i] ], nan_policy = "omit" ) / 10
    #min_v = df[v].min()
    #max_v = df[v].max()
    #rng_v = max_v - min_v
    #df[ var_i[i] ] = 10 * ( df[v] - min_v ) / rng_v
    if v in vars[0:3]: df[ var_i[i] ] = 10 - df[ var_i[i] ]

# Cálculo del índice
df[index_n] = df[var_i].mean(axis = 1)
# Normalización 0-10
min_v = df[index_n].min()
max_v = df[index_n].max()
rng_v = max_v - min_v
df[ index_n ] = 10 * ( df[index_n] - min_v ) / rng_v

# Guardamos el archivo
df.to_csv(path_r + "socioeconomic_index.csv")

# Mapa
borders[index_n] = df[index_n]
map_v = gv.Polygons( borders,
    vdims = gv.Dimension(index_n, range = (0, 10.0001) )
    ).opts( cmap = "plasma_r", **options )
map = ( ocean * map_v ).opts( **options_m )
gv.output( map, size = 600 )

# 10 países más altos
df[["Name", index_n] + var_i].sort_values(index_n, ascending = False).head(10)

Unnamed: 0_level_0,Name,Socioeconomic vulnerability index,Mean years of schooling_index,Scientific articles per million inhabitants_index,Young workforce ratio (20-39 to 40-65 years old)_index,Gender inequality index_index,Total debt service [% of GNI]_index,Central government debt [% of GDP]_index,Gini coefficient_index
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
PNG,Papua New Guinea,10.0,8.606557,8.852459,2.84153,9.096386,9.354839,,7.704918
AGO,Angola,9.858975,7.95082,9.836066,1.147541,8.012048,9.516129,,9.453552
MOZ,Mozambique,9.656796,9.125683,9.071038,0.491803,7.108434,10.0,,9.344262
NIC,Nicaragua,9.249711,6.803279,9.016393,3.278689,5.843373,9.677419,,8.961749
KHM,Cambodia,8.978081,8.442623,8.196721,4.262295,7.349398,8.387097,,5.901639
LSO,Lesotho,8.803123,6.557377,7.595628,1.530055,8.493976,9.112903,,8.579235
COG,Congo,8.787957,6.147541,7.814208,2.95082,8.674699,7.016129,,9.20765
GTM,Guatemala,8.75601,8.087432,8.743169,1.748634,7.048193,6.935484,,9.125683
VEN,Venezuela (Bolivarian Republic of),8.697359,4.808743,7.322404,6.284153,8.072289,6.451613,,8.52459
SLV,El Salvador,8.646944,7.021858,9.672131,3.825137,5.271084,9.032258,,6.448087
