In [35]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [36]:
indicadores = [
    "NY.GNP.PCAP.CD",  # PIB per cápita
    "SP.DYN.LE00.IN",  # Esperanza de vida al nacer
    "SH.DYN.MORT",  # Mortalidad infantil
    "SE.ADT.LITR.ZS",  # Tasa de alfabetización
    "SE.XPD.TOTL.GD.ZS",  # Gasto en educación
    "SH.H2O.SAFE.ZS",  # Acceso a agua potable
    "SH.MED.BEDS.ZS",  # Acceso a servicios de salud
    "SL.UEM.TOTL.ZS",  # Desempleo
    "HDI",  # Índice de desarrollo humano (IDH)
    "SP.DYN.TFRT.IN"  # Tasa de fecundidad
]

data = pd.read_csv('WDIData.csv')
indicadores_df = data.loc[data['Indicator Code'].isin(indicadores)]

In [45]:
data.head(10)

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
0,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.ZS,,,,,,,...,17.196986,17.597176,18.034249,18.345878,18.695306,19.149942,19.501837,,,
1,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS,,,,,,,...,6.580066,6.786218,6.941323,7.096843,7.254828,7.460783,7.599289,,,
2,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.UR.ZS,,,,,,,...,37.857526,38.204173,38.303515,38.421813,38.482409,38.692053,38.793983,,,
3,Africa Eastern and Southern,AFE,Access to electricity (% of population),EG.ELC.ACCS.ZS,,,,,,,...,31.82495,33.744405,38.733352,40.092163,42.880977,44.073912,45.609604,,,
4,Africa Eastern and Southern,AFE,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,,,,,,,...,17.485006,16.329765,24.372504,25.153292,27.227391,29.383,30.163364,,,
5,Africa Eastern and Southern,AFE,"Access to electricity, urban (% of urban popul...",EG.ELC.ACCS.UR.ZS,,,,,,,...,66.146834,66.936479,68.977838,70.77173,71.692427,73.164863,73.775675,,,
6,Africa Eastern and Southern,AFE,Account ownership at a financial institution o...,FX.OWN.TOTL.ZS,,,,,,,...,,,,,,,,,,
7,Africa Eastern and Southern,AFE,Account ownership at a financial institution o...,FX.OWN.TOTL.FE.ZS,,,,,,,...,,,,,,,,,,
8,Africa Eastern and Southern,AFE,Account ownership at a financial institution o...,FX.OWN.TOTL.MA.ZS,,,,,,,...,,,,,,,,,,
9,Africa Eastern and Southern,AFE,Account ownership at a financial institution o...,FX.OWN.TOTL.OL.ZS,,,,,,,...,,,,,,,,,,


In [38]:
# Lista de países de América Latina
paises_latinoamerica = ['Antigua and Barbuda', 'Argentina', 'Bolivia', 'Brazil', 'Belize', 'Bahamas, The', 'Barbados', 'Chile', 'Colombia', 'Costa Rica', 'Cuba', 
 'Dominican Republic', 'Dominica', 'Ecuador', 'El Salvador', 'Grenada', 'Guatemala', 'Guyana', 'Haiti', 'Honduras', 'Jamaica', 'Nicaragua', 'Panama', 'Paraguay',  
 'Peru', 'Puerto Rico', 'Suriname', 'St. Kitts and Nevis', 'St. Lucia', 'St. Vincent and the Grenadines', 'Trinidad and Tobago','Uruguay', 'Venezuela, RB',]

indicadores_latinoamerica = indicadores_df.loc[indicadores_df['Country Name'].isin(paises_latinoamerica)]
indicadores_latinoamerica.to_csv('indicadores_latinoamerica.csv')


In [39]:
indicadores_latinoamerica.reset_index(inplace=True)
indicadores_latinoamerica.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 264 entries, 0 to 263
Data columns (total 69 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   index           264 non-null    int64  
 1   Country Name    264 non-null    object 
 2   Country Code    264 non-null    object 
 3   Indicator Name  264 non-null    object 
 4   Indicator Code  264 non-null    object 
 5   1960            124 non-null    float64
 6   1961            93 non-null     float64
 7   1962            112 non-null    float64
 8   1963            112 non-null    float64
 9   1964            113 non-null    float64
 10  1965            113 non-null    float64
 11  1966            114 non-null    float64
 12  1967            116 non-null    float64
 13  1968            118 non-null    float64
 14  1969            119 non-null    float64
 15  1970            162 non-null    float64
 16  1971            127 non-null    float64
 17  1972            125 non-null    flo

In [40]:
porcentaje_nulos = indicadores_latinoamerica.isnull().mean(axis=1) * 100
filas_con_nulos = porcentaje_nulos[porcentaje_nulos >= 20].index
print(filas_con_nulos)


Int64Index([  1,   2,   3,   5,   7,  10,  11,  13,  15,  18,
            ...
            245, 247, 250, 251, 253, 255, 258, 259, 261, 263],
           dtype='int64', length=144)


In [41]:
indicadores_latinoamerica = indicadores_latinoamerica.drop(filas_con_nulos)

In [42]:
columnas_float = ['1960', '1961', '1962', '1963', '1964', '1965',
       '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974',
       '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983',
       '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992',
       '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
       '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',
       '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019',
       '2020', '2021', '2022']

indicadores_latinoamerica[columnas_float] = indicadores_latinoamerica[columnas_float].astype('float')

# Aplicar interpolación por fila
indicadores_latinoamerica[columnas_float] = indicadores_latinoamerica[columnas_float].interpolate(axis=1)

In [43]:
indicadores_latinoamerica.head()

Unnamed: 0,index,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
0,81716,Antigua and Barbuda,ATG,"Fertility rate, total (births per woman)",SP.DYN.TFRT.IN,4.602,4.559,4.547,4.536,4.484,...,1.526,1.567,1.458,1.485,1.371,1.468,1.569,1.58,1.58,
4,81999,Antigua and Barbuda,ATG,"Life expectancy at birth, total (years)",SP.DYN.LE00.IN,61.55,62.363,63.192,64.101,65.058,...,77.858,77.913,78.152,78.268,78.511,78.691,78.841,78.497,78.497,
6,82098,Antigua and Barbuda,ATG,"Mortality rate, under-5 (per 1,000 live births)",SH.DYN.MORT,86.3,80.4,74.0,67.4,60.8,...,8.1,7.7,7.4,7.1,6.8,6.6,6.4,6.1,6.1,
8,83194,Argentina,ARG,"Fertility rate, total (births per woman)",SP.DYN.TFRT.IN,3.075,3.069,3.106,3.101,3.08,...,2.312,2.301,2.241,2.168,2.039,1.994,1.911,1.885,1.885,
9,83275,Argentina,ARG,"GNI per capita, Atlas method (current US$)",NY.GNP.PCAP.CD,,,,,1130.0,...,12350.0,12600.0,12220.0,13140.0,12430.0,11220.0,9010.0,9960.0,9960.0,


In [44]:
indicadores_latinoamerica = indicadores_latinoamerica.loc[:, ~indicadores_latinoamerica.columns.str.contains('^Unnamed')]

indicadores_latinoamerica.to_csv('indicadores_latinoamerica.csv')