In [12]:
import pandas as pd
from pathlib import Path

# loading dataset
df = pd.read_csv("../data/co2-emissions.csv")

# basic cleaning
df = df.rename(columns={
    "Entity": "Country",
    "Annual COâ‚‚ emissions": "Co2_Annual"
})

df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
df["Co2_Annual"] = pd.to_numeric(df["Co2_Annual"], errors="coerce")
df = df.dropna(subset=["Year", "Co2_Annual"])

# creating global totals
df_global = (
    df.groupby("Year", as_index=False)["Co2_Annual"]
      .sum()
      .rename(columns={"Co2_Annual": "Co2_Global"})
)

df_global["YoY_Global_Change"] = df_global["Co2_Global"].pct_change()

# country dataset
df_country = df.copy()

# saving processed files
Path("processed").mkdir(exist_ok=True)

df_global.to_csv("../data/processed/co2_global_clean.csv", index=False)
df_country.to_csv("../data/processed/co2_country_clean.csv", index=False)

df_global.head(), df_country.head()

(   Year  Co2_Global  YoY_Global_Change
 0  1750  55835622.0                NaN
 1  1751  56443374.0           0.010885
 2  1752  57031008.0           0.010411
 3  1753  57662940.0           0.011080
 4  1754  58401480.0           0.012808,
        Country Code  Year  Co2_Annual
 0  Afghanistan  AFG  1949     14656.0
 1  Afghanistan  AFG  1950     84272.0
 2  Afghanistan  AFG  1951     91600.0
 3  Afghanistan  AFG  1952     91600.0
 4  Afghanistan  AFG  1953    106256.0)