In [10]:
import pandas as pd

# Load the merged dataset and the tertiary education data
merged_data = pd.read_csv("old_merged_data.csv")
tertiary_data = pd.read_csv("tertiary_education_data.csv", skiprows=4)

# Extract relevant columns and reshape the data
tertiary_data = tertiary_data.melt(id_vars=["Country Name"], var_name="Year", value_name="Tertiary Enrollment")
tertiary_data["Year"] = pd.to_numeric(tertiary_data["Year"], errors='coerce')
tertiary_data = tertiary_data.dropna()

# Get the latest year data for each country
latest_tertiary_data = tertiary_data.loc[tertiary_data.groupby("Country Name")["Year"].idxmax()]

# Rename the columns for clarity
latest_tertiary_data = latest_tertiary_data.rename(columns={
    "Country Name": "country",
    "Tertiary Enrollment": "tertiary_percentage",
    "Year": "latest_year_data"
})

# Merge with the existing data
combined_data = merged_data.merge(latest_tertiary_data, on="country", how="left")

# Find countries with missing tertiary data
missing_tertiary_data = combined_data[combined_data["tertiary_percentage"].isnull()]

# Save the combined data and the missing tertiary data
combined_data.to_csv("merged_data.csv", index=False)
missing_tertiary_data.to_csv("missing_tertiary_data.csv", index=False)