In [5]:
import pandas as pd
import numpy as np
import os

# Directory where CSV files are stored
data_dir = "/Users/manuelmorales/inspireauth/hdi_data/"  # Change this to your actual path

# List of countries
countries = [
    "Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Costa_Rica", "Cuba",
    "Dominican_Republic", "Ecuador", "El_Salvador", "Guatemala", "Honduras",
    "Mexico", "Panama", "Paraguay", "Peru", "Uruguay", "Venezuela"
]

# Dictionary to store HDI values for each country
hdi_data = {}
hdi_averages = {}  # Dictionary to store average HDI values

# Loop through all country CSV files
for country in countries:
    file_path = os.path.join(data_dir, f"{country}.csv")
    
    if os.path.exists(file_path):  # Ensure the file exists
        # Load the CSV file
        df = pd.read_csv(file_path)

        # Filter for "Human Development Index (value)"
        hdi_df = df[df["key"].str.startswith("Human Development Index (value)")]

        # Extract year from 'key' column
        hdi_df["year"] = hdi_df["key"].str.extract(r"(\d{4})").astype(int)

        # Convert 'value' column to numeric
        hdi_df["value"] = pd.to_numeric(hdi_df["value"])

        # Filter years 1990-2021
        hdi_df = hdi_df[(hdi_df["year"] >= 1990) & (hdi_df["year"] <= 2021)]

        # Store HDI values in dictionary as a NumPy array
        hdi_values = hdi_df["value"].to_numpy()

        # Test: Ensure the array has exactly 31 values (1990-2021)
        if len(hdi_values) == 32:
            hdi_data[country] = hdi_values
            hdi_averages[country] = np.mean(hdi_values)  # Compute the average
        else:
            print(f"Warning: {country} has {len(hdi_values)} HDI values instead of 31.")

# Print an example (e.g., Argentina)
print("HDI for Argentina:", hdi_data["Argentina"])
print("Average HDI for Argentina:", hdi_averages["Argentina"])

# Save results
np.savez("hdi_data.npz", **hdi_data)  # Saves each country as a separate array in a compressed file

# Save averages to a CSV file
avg_df = pd.DataFrame(list(hdi_averages.items()), columns=["Country", "Average_HDI"])
avg_df.to_csv("hdi_averages.csv", index=False)

# To load later:
# loaded_data = np.load("hdi_data.npz")
# hdi_argentina = loaded_data["Argentina"]

# To check the saved averages
# avg_hdi_df = pd.read_csv("hdi_averages.csv")


HDI for Argentina: [0.724 0.731 0.736 0.74  0.746 0.747 0.752 0.758 0.764 0.774 0.78  0.785
 0.785 0.793 0.798 0.802 0.814 0.817 0.825 0.827 0.834 0.842 0.844 0.846
 0.847 0.85  0.848 0.853 0.852 0.853 0.841 0.844]
Average HDI for Argentina: 0.801625


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hdi_df["year"] = hdi_df["key"].str.extract(r"(\d{4})").astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hdi_df["value"] = pd.to_numeric(hdi_df["value"])


In [6]:
hdi_data["Argentina"]

array([0.724, 0.731, 0.736, 0.74 , 0.746, 0.747, 0.752, 0.758, 0.764,
       0.774, 0.78 , 0.785, 0.785, 0.793, 0.798, 0.802, 0.814, 0.817,
       0.825, 0.827, 0.834, 0.842, 0.844, 0.846, 0.847, 0.85 , 0.848,
       0.853, 0.852, 0.853, 0.841, 0.844])

In [7]:
for country, avg_hdi in hdi_averages.items():
    print(f"Average HDI for {country}: {avg_hdi}")

Average HDI for Argentina: 0.801625
Average HDI for Bolivia: 0.6403125
Average HDI for Brazil: 0.69778125
Average HDI for Chile: 0.79
Average HDI for Colombia: 0.70140625
Average HDI for Costa_Rica: 0.7419374999999999
Average HDI for Cuba: 0.728125
Average HDI for Dominican_Republic: 0.67878125
Average HDI for Ecuador: 0.7112499999999999
Average HDI for El_Salvador: 0.6228750000000001
Average HDI for Guatemala: 0.5763125
Average HDI for Honduras: 0.5724374999999999
Average HDI for Mexico: 0.727875
Average HDI for Panama: 0.75053125
Average HDI for Paraguay: 0.6809375
Average HDI for Peru: 0.6990937500000001
Average HDI for Uruguay: 0.768125
Average HDI for Venezuela: 0.7186874999999999
