In [1]:
import pandas as pd

# OWID CO2 dataset (master branch)
df = pd.read_csv('co2/owid-co2-data.csv')

# Ensure numeric (co2 is typically in million tonnes; per_capita in tonnes/person)
for c in ["co2", "co2_per_capita", "population"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")


iso_codes = df["iso_code"].astype("string")
country_mask = iso_codes.notna() & iso_codes.str.len().eq(3)
countries = df[country_mask].copy()

# global TOTAL each year = sum of country totals (may be slightly lower if some missing)
global_total_sum = countries.groupby("year")["co2"].sum(min_count=1)

# population-weighted global PER-CAPITA each year
tmp = countries.dropna(subset=["co2_per_capita", "population"]).copy()
tmp["pc_x_pop"] = tmp["co2_per_capita"] * tmp["population"]
agg = tmp.groupby("year").agg(
    pc_x_pop=("pc_x_pop", "sum"),
    pop=("population", "sum"),
)
global_percap_weighted = agg["pc_x_pop"] / agg["pop"]

print("\nGlobal TOTAL CO2 (sum over ISO3 countries):")
print(global_total_sum.dropna().tail(10))

print("\nGlobal PER-CAPITA CO2 (population-weighted over ISO3 countries):")
print(global_percap_weighted.dropna().tail(10))

# Export
out = pd.DataFrame({
    "global_total_co2": global_total_sum,
    "global_percapital_co2_weighted": global_percap_weighted,
}).sort_index()

out.to_csv("preprocessed/global_co2_by_year.csv", index_label="year")
print("\nSaved: global_co2_by_year.csv")


Global TOTAL CO2 (sum over ISO3 countries):
year
2015    227241.886
2016    227186.128
2017    230859.281
2018    235325.126
2019    237264.344
2020    226123.155
2021    236932.336
2022    240370.227
2023    242955.627
2024    245672.264
Name: co2, dtype: float64

Global PER-CAPITA CO2 (population-weighted over ISO3 countries):
year
2015    4.662886
2016    4.605963
2017    4.626656
2018    4.668246
2019    4.659689
2020    4.402939
2021    4.579857
2022    4.613068
2023    4.623979
2024    4.634102
dtype: float64

Saved: global_co2_by_year.csv
