In [1]:
# Import packages
import pandas as pd

In [2]:
# Import raw emissions file from CAIT
df = pd.read_csv("historical_emissions.csv")

In [3]:
# Drop 'Data source' and 'Unit' columns
df = df.drop(columns=["Data source", "Unit"])

In [4]:
# Convert years into rows
df = df.melt(
    id_vars=["Country", "Sector", "Gas"], 
    var_name="Date", 
    value_name="Value"
)

In [5]:
# Country mapping of OWID country names
country_mapping = pd.read_csv("cait_country_standardized.csv")

In [6]:
# Import population dataset
population = pd.read_csv("population.csv")

In [7]:
for gas in ["All GHG", "CO2", "CH4", "N2O"]:

    gas_df = df.loc[df["Gas"] == gas]

gas_df = gas_df.pivot_table(
    index=["Country", "Date"],
    columns="Sector",
    values="Value"
).reset_index()

# Add country mapping of OWID country names
gas_df = pd.merge(gas_df, country_mapping, on="Country")
gas_df = gas_df.drop(columns=["Country"])

# Rename columns
gas_df = gas_df.rename(columns={
        "OWIDCountry": "Country",
        "Date": "Year",
        "Industrial Processes": "Industry",
        "Electricity/Heat": "Electricity & Heat",
        "Bunker Fuels": "International aviation & shipping",
        "Transportation": "Transport",
        "Manufacturing/Construction": "Manufacturing & Construction",
        "Building": "Buildings"
    })


In [8]:
gas_df = gas_df.merge(population, how="left", on=["Country", "Year"])
columns_per_capita = ['Agriculture',
                      'Buildings',
                      'Electricity & Heat',
                      'Fugitive Emissions',
                      'Industry',
                      'International aviation & shipping',
                      'Land-Use Change and Forestry',
                      'Manufacturing & Construction',
                      'Total excluding LUCF',
                      'Total including LUCF',
                      'Transport',
                      'Waste'
                     ]

for col in columns_per_capita:
    if col in gas_df.columns: 
        gas_df[f"{col} (per capita)"] = gas_df[col] / gas_df['Population'] * 1000000
        
gas_df = gas_df.drop(columns=["Population"])

In [9]:
left_columns = ["Country", "Year"]
other_columns = sorted([col for col in gas_df.columns if col not in left_columns])
column_order = left_columns + other_columns
gas_df = gas_df[column_order]

if gas == "All GHG": filename = "all_ghg_emissions.csv"
else: filename = f"{gas}_by_sector.csv"
gas_df.to_csv(filename, index=False)