We start by importing relevant packages.

In [122]:
import json
import numpy as np
from scipy.interpolate import interp1d
import plotly.graph_objects as go

Let's load the data.

In [123]:
# Specify the path to the JSON file
filepath = "data/data.json"

# Open the JSON file and load its contents into a Python dictionary
with open(filepath, "r") as file:
    data = json.load(file)

I want the keys to be integers.

In [124]:
data = {int(key): value for key, value in data.items()}

We also have to handle the fact that some of the scenarios were renamed.

In [125]:
# Key mapping for normalization
key_mapping = {
    "Reference": "Current Policies",
    "New Policies": "Stated Policies",
}

relevant_keys = ["Current Policies", "Stated Policies", "Announced Pledges"]


# Step 0: Normalize keys across all values
def normalize_keys(data_dict):
    for content in data_dict.values():
        values = content.get("values", {})
        normalized_values = {}
        for key, value in values.items():
            normalized_key = key_mapping.get(key, key)  # Map key if in key_mapping
            if normalized_key in normalized_values:
                print(
                    f"Warning: Duplicate normalized key '{normalized_key}'. Overwriting previous entry."
                )
            normalized_values[normalized_key] = value
        content["values"] = normalized_values

In [126]:
normalize_keys(data)

I want to be able to interpolate the data.

In [127]:
def generate_splines(data_dict):
    for _, content in data_dict.items():
        # Extract years and values
        year_values = content["year"]
        value_dict = content["values"]

        # Create a new "splines" dictionary
        splines = {}
        for key, values in value_dict.items():
            # Handle null values by filtering them out
            valid_data = [
                (yr, val) for yr, val in zip(year_values, values) if val is not None
            ]
            if len(valid_data) >= 2:  # Ensure there are enough points to interpolate
                valid_years, valid_values = zip(*valid_data)
                spline = interp1d(
                    valid_years, valid_values, kind="cubic", fill_value="extrapolate"
                )
                splines[key] = spline  # Store the spline function
            else:
                splines[key] = None  # Not enough data points or missing values

        # Add "splines" to the content
        content["splines"] = splines

In [128]:
generate_splines(data)

Now I need a method of extracting the predictions for the same scenario over the years. And a method to actually get the value in the tested year.

In [129]:
def obtain_values(data_dict, unique_keys, year_pred, last_year=None):
    values = {key: [] for key in unique_keys}
    years = []  # To keep track of x-axis years

    if last_year:
        terminal_year = last_year
    else:
        terminal_year = year_pred

    for year, content in data_dict.items():
        if year >= terminal_year:
            continue  # Skip years equal to or after the terminal year

        years.append(year)
        splines = content.get("splines", {})
        for key in unique_keys:
            if key in splines and splines[key] is not None:  # Valid spline exists
                values[key].append(splines[key](year_pred).item())
            else:
                values[key].append(None)  # Append None if no valid spline

    return years, values


def obtain_reference_values(data_dict, unique_keys, year_pred):
    actual_values = {}
    for year, content in data_dict.items():
        if year < year_pred:  # skip predictions
            continue
        actual_values[year] = {}
        if "values" in content:
            for key, value_list in content["values"].items():
                if key in unique_keys:
                    if year_pred in content["year"]:
                        idx = content["year"].index(
                            year_pred
                        )  # Find the index of year_pred
                        actual_values[year][key] = (
                            value_list[idx] if idx < len(value_list) else None
                        )

    # Remove keys with empty dictionaries
    cleaned_values = {key: value for key, value in actual_values.items() if value != {}}

    # Let's get rid of redundancies, checking for consistency
    for key, sub_dict in cleaned_values.items():
        values = set(sub_dict.values()) - {None}
        if len(values) > 1:  # More than one unique value
            raise ValueError(
                f"Dictionary under key '{key}' has differing values: {values}"
            )
        else:
            cleaned_values[key] = values.pop()

    return cleaned_values

Let's add a plotting function.

In [131]:
def plot_results(years, values, year_test, reference_dict=None):
    fig = go.Figure()

    for key, y_values in values.items():
        if not all(element is None for element in y_values):
            fig.add_trace(
                go.Scatter(
                    x=years,
                    y=[y / 1000 if y is not None else None for y in y_values],
                    mode="lines+markers",
                    name=key,
                    text=[
                        f"{key}: {val/1000:.0f}" if val is not None else f"{key}: None"
                        for val in y_values
                    ],
                )
            )

    if reference_dict:
        for ref_year, ref_value in reference_dict.items():
            fig.add_trace(
                go.Scatter(
                    x=[min(years), max(years)],
                    y=[ref_value / 1000, ref_value / 1000],
                    mode="lines",
                    line=dict(dash="dash"),
                    name=f"Reported Value at {ref_year}",
                    text=[
                        f"Reported Value at {ref_year}: {ref_value/1000:.0f}"
                        for _ in range(2)
                    ],
                )
            )

    # Customize the layout
    fig.update_layout(
        title=f"CO₂ Emissions Predicted for Year {year_test}",
        xaxis_title="Year",
        yaxis_title="Emissions (Gt CO₂)",
        legend_title="Scenarios",
        template="plotly_white",
    )

    # Show the plot
    fig.show()

We will evaluate the data for 2019.

In [132]:
reference_year = 2019

reference_values = obtain_reference_values(data, relevant_keys, reference_year)

years, preds = obtain_values(data, relevant_keys, reference_year)

plot_results(years, preds, reference_year, reference_values)

We will evaluate the data for 2040 also.

In [None]:
reference_year = 2040

reference_values = obtain_reference_values(data, relevant_keys, reference_year)

years, preds = obtain_values(data, relevant_keys, reference_year)

plot_results(years, preds, reference_year, reference_values)