In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os


In [None]:
start_year = 1970
end_year = 2025
defaults = ["United States", "China", "India", "Germany", "Brazil", "Nigeria"]

In [None]:
death_rate_file = "staticData/birth-rate-vs-death-rate/birth-rate-vs-death-rate.csv"
health_exp_file = "staticData/life-expectancy-vs-health-expenditure/life-expectancy-vs-health-expenditure.csv"

# Check if files exist
if not os.path.exists(death_rate_file):
    print(f"Error: File not found at {death_rate_file}")

if not os.path.exists(health_exp_file):
    print(f"Error: File not found at {health_exp_file}")


# Load data
print("Loading data...")
try:
    df_death = pd.read_csv(death_rate_file)
    df_health = pd.read_csv(health_exp_file)
except Exception as e:
    print(f"Error reading CSV files: {e}")


# Column names
col_death_rate = "Death rate - Sex: all - Age: all - Variant: estimates"
col_life_exp = "Life expectancy - Sex: all - Age: 0 - Variant: estimates"
col_health_exp = "Health expenditure per capita - Total"

# Rename columns for easier handling
if col_death_rate in df_death.columns:
    df_death = df_death.rename(columns={col_death_rate: "death_rate"})
if col_life_exp in df_health.columns:
    df_health = df_health.rename(columns={col_life_exp: "life_expectancy", col_health_exp: "health_expenditure"})

# Merge datasets on Entity, Code, Year
print("Merging data...")
df_merged = pd.merge(df_death[["Entity", "Code", "Year", "death_rate"]],
                     df_health[["Entity", "Code", "Year", "life_expectancy", "health_expenditure"]],
                     on=["Entity", "Code", "Year"],
                     how="inner")

# Filter out rows with missing data for our variables
initial_count = len(df_merged)
df_merged = df_merged.dropna(subset=["death_rate", "life_expectancy", "health_expenditure"])
final_count = len(df_merged)
print(f"Data points after merging and cleaning: {final_count} (dropped {initial_count - final_count})")



In [None]:

available_countries = df_merged["Entity"].unique()
print(f"available countries: {available_countries}")
selected_countries = [c for c in defaults if c in available_countries]
if not selected_countries:
     selected_countries = available_countries[:5] # Fallback to first 5


df_filtered = df_merged[df_merged["Entity"].isin(selected_countries)].copy()

# Filter by Years

df_filtered = df_filtered[df_filtered["Year"] >= start_year]
df_filtered = df_filtered[df_filtered["Year"] <= end_year]

if len(df_filtered) == 0:
    print("No data found for the specified criteria.")


# Calculate metric: Health Expenditure / Death Rate
df_filtered["expenditure_per_death_rate"] = df_filtered["health_expenditure"] / df_filtered["death_rate"]

# Sort by Year to ensure lines are drawn correctly
df_filtered = df_filtered.sort_values(by=["Entity", "Year"])