In [1]:
import pandas as pd
import os

**This script loads and cleans Table 3 data:
'Surgical operations and procedures related to diseases of the circulatory system, 2018 and 2023'**

* Step 1: Load Excel file and target 'Table 3'
* Step 2: Drop empty columns
* Step 3: Rename columns clearly
* Step 4: Remove non-country rows and notes
* Step 5: Convert numeric columns
* Step 6: Remove footnotes and save cleaned data

**Notes on Table 3:**

The data include rates per 100,000 inhabitants for two procedure types:
- Coronary artery bypass graft
- Transluminal coronary angioplasty

In [2]:
file_path = "Cardiovascular_diseases_Health2025.xlsx"

df_raw = pd.read_excel(
    file_path,
    sheet_name="Table 3",
    header=None,
    skiprows=9,
    usecols="C:G"
)

df_raw.columns = ["Country", "Bypass_2018", "Bypass_2023", "Angioplasty_2018", "Angioplasty_2023"]

df_raw = df_raw.drop([0, 1]).reset_index(drop=True)

# Keep only rows with valid country names
df_raw = df_raw[df_raw["Country"].notna()]
df_raw = df_raw[~df_raw["Country"].str.contains("Source|Bookmark|For text|Break", na=False)]

df_raw["Country"] = df_raw["Country"].astype(str).str.replace(r"\s*\(.*\)", "", regex=True).str.strip()

# Convert ':' and empty strings to NaN, then numeric columns
for col in ["Bypass_2018", "Bypass_2023", "Angioplasty_2018", "Angioplasty_2023"]:
    df_raw[col] = pd.to_numeric(df_raw[col].replace(":", pd.NA), errors="coerce")

df_raw = df_raw[df_raw["Country"] != ""]

df_raw = df_raw[df_raw["Country"].notna()]
df_raw = df_raw[~df_raw["Country"].str.contains(
    "Source|Bookmark|For text|Break|Public|2022|2021", 
    case=False, na=False
)]

output_dir = "data/processed"
os.makedirs(output_dir, exist_ok=True)

output_path = os.path.join(output_dir, "cvd_surgery_clean.csv")
df_raw.to_csv(output_path, index=False)

print(f"Cleaned Table 3 saved to: {output_path}")
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
print(df_raw.head(30))

Cleaned Table 3 saved to: data/processed/cvd_surgery_clean.csv
          Country  Bypass_2018  Bypass_2023  Angioplasty_2018  \
0         Belgium        59.34        55.48            258.97   
1        Bulgaria        45.39        41.62            247.37   
2         Czechia        44.63        39.79            214.52   
3         Denmark        53.44        23.63            191.35   
4         Germany        54.57        45.65            408.48   
5         Estonia        32.68        30.14            222.07   
6         Ireland        19.31        33.12            128.53   
7          Greece          NaN          NaN               NaN   
8           Spain        15.66        15.50            121.36   
9          France        28.24        26.28            275.63   
10        Croatia        72.52        63.89            423.49   
11          Italy        34.12        30.11            222.62   
12         Cyprus        41.00        68.81            151.65   
13         Latvia          