In [6]:
import pandas as pd
import os

# 1. CLEAN CO2 EMISSIONS


co2_df = pd.read_csv("co2_emission_2000_2023.csv")
co2_df = co2_df[['country', 'iso_code', 'year', 'co2']]
co2_df = co2_df[(co2_df['year'] >= 2000) & (co2_df['year'] <= 2023)]
co2_df.dropna(subset=['co2'], inplace=True)
co2_df.rename(columns={
    'country': 'Country',
    'iso_code': 'Country Code',
    'year': 'Year',
    'co2': 'CO2_Emissions'
}, inplace=True)



# 2. CLEAN PM2.5


pm25_df = pd.read_csv("pm25_air_pollution.csv")
pm25_long = pd.melt(pm25_df, id_vars=["Country", "Country Code"],
                    var_name="Year", value_name="PM2.5")
pm25_long["Year"] = pm25_long["Year"].astype(int)


# 3. CLEAN TEMPERATURE


temp_df = pd.read_csv("avg_temperature.csv")
temp_df.rename(columns={"ISO3": "Country Code"}, inplace=True)
temp_df = temp_df[(temp_df["Year"] >= 2000) & (temp_df["Year"] <= 2023)]


# 4. CLEAN HEALTH DATA (Death Rate)


health_df = pd.read_csv("air_pollution_health.csv")
health_df = health_df.rename(columns={
    "Entity": "Country",
    "Death rate from air pollution (IHME) (per 100,000 population)": "Death_Rate"
})
health_df = health_df[["Country", "Year", "Death_Rate"]]
health_df = health_df[(health_df["Year"] >= 2000) & (health_df["Year"] <= 2023)]
health_df.dropna(subset=["Death_Rate"], inplace=True)


# 5. MERGE PM2.5 + TEMP


pm25_temp = pd.merge(pm25_long, temp_df, how="inner", on=["Country Code", "Year"])
pm25_temp.drop(columns=["Country_y"], inplace=True)
pm25_temp.rename(columns={"Country_x": "Country"}, inplace=True)


# 6. MERGE with CO2


merged1 = pd.merge(pm25_temp, co2_df, how="inner", on=["Country Code", "Year"])
merged1.drop(columns=["Country_y"], inplace=True)
merged1.rename(columns={"Country_x": "Country"}, inplace=True)


# 7. MERGE with HEALTH DATA


final_df = pd.merge(merged1, health_df, how="inner", on=["Country", "Year"])


# 8. EXPORT FINAL DATASET

output_path = r"C:\Users\prera\OneDrive\Documents\final_climate_health_data.csv"
final_df.to_csv(output_path, index=False)


print(os.path.exists("final_climate_health_data.csv"))



True
