### Sensitivity Analysis and Probabilistic Estimation of Reservoir Emissions with Re-Emission

**Author**: 
Tomasz Janus,
University of Manchester,
01/07/2025

#### NOTE:
- Requires **SALib** library - Python implementations of commonly used sensitivity analysis methods, including Sobol, Morris, and FAST methods - see: https://app.readthedocs.org/projects/salib/downloads/pdf/stable/

#### QUICK DESCRIPTION:

In this example, we leverage Re-Emission's capability to dynamically alter its configuration parameters, such as emission model regression coefficients, pre-impoundment emissions or nutrient exports to perform sensitivity analysis and Monte-Carlo simulations of reservoir emissions under parametric uncertainties. The analysis is performed using Python's **SALib** package. ReEmission's `salib` module contains interfaces to **SALib** methods and functions allowing seamless integration with **SALib** enabling global sensitivity analysis to parametric uncertainties as well as input uncertainties.

This notebook performs a simple analysis to briefly demonstrate the capability of ReEmission's `salib` module. For simplicity, it is restricted to testing model sensitivity to parametric uncertainties stemming from emission regression equations only. Sensitivity to other parametric uncertainties and to input uncertainties have not been investigated here.

In [None]:
# Notebook imports
import pathlib
import importlib
import pickle
from functools import partial
%matplotlib inline
import matplotlib.pyplot as plt
from rich import print as rprint
from reemission.salib.runners import SALibProblem, SobolAnalyser
from reemission.salib.wrappers import TestModelSALibWrapper, ReEmissionSALibWrapper
from reemission.salib.visualize import SobolResultVisualizer, SobolScenarioResultsVisualizer
from reemission.salib.specloaders import (
    ReEmissionSALibSpecLoader,
    TestModelSALibSpecLoader,
    set_unit_input_distribution_using_rel_diffrence)
from reemission.input import Inputs
from reemission.salib.runners import SobolResults, SobolScenarioResults

# Constants
REL_DIFF = 0.1

### 1. Testing global sensitivity with SAliB and Sobol indices on a test model

In [None]:
test_salib_spec = TestModelSALibSpecLoader(pathlib.Path("params_test.yaml"))
test_model_variables = test_salib_spec.list_of_variables
test_salib_problem = SALibProblem.from_variables(test_model_variables)
test_model = TestModelSALibWrapper.from_variables(test_model_variables)
rprint("SALib problem:")
rprint(test_salib_problem)
analyser = SobolAnalyser(
    problem = test_salib_problem,
    variables = test_model_variables,
    model = test_model,
    num_samples = 1024
)
results = analyser.run_sobol()

scenarios = {
    'Scenario 1': {
        'cont1': 2,
        'cont2': 1
    },
    'Scenario 2': {
        'cont1': 3,
        'cont2': 2
    },
    'Scenario 3': {
        'cont1': 3,
        'b': 0.4
    },    
}
results_sc = analyser.run_sobol_scenarios(scenarios)

# Save results
with open(pathlib.Path('../outputs_and_intermediate/test_sobol.pkl'), 'wb') as handle:
    pickle.dump([results, results_sc], handle, protocol=pickle.HIGHEST_PROTOCOL)

visualizer = SobolResultVisualizer(results)
scenario_visualizer = SobolScenarioResultsVisualizer(results_sc)

fig, axes = plt.subplots(3, 2, figsize=(10, 9))
axes = axes.flatten()
visualizer.plot_S1_ST(
    ax=axes[0],
    title='Sobol Sensitivity Indices - single scenario')
scenario_visualizer.plot_S1_ST(
    ax=axes[1],
    title="Sobol Senstivity Indices across multiple scenarios")
visualizer.plot_variance_contribution_by_group(
    ax=axes[2],
    title="Variance contribution by uncertainty group - single scenario")
#visualizer.plot_output_histogram(ax=axes[3])
visualizer.plot_output_kde(
    ax=axes[3],
    title="Distribution of model output - single scenario")
scenario_visualizer.plot_variance_per_scenario(
    ax=axes[4],
    title="Scenario-specific uncertainty decomposition")
scenario_visualizer.plot_outputs_per_scenarios(
    ax=axes[5], 
    title="Outputs across multiple scenarios",
    component_colors=['#1f77b4', '#2ca02c', '#ff7f0e'])
fig.savefig(pathlib.Path('../outputs_and_intermediate/test_model_example_figure.png'))
fig.show()


### 2. Sensitivity Analysis using Re-Emission

* Confidence intervals for some parameters were derived from [G-Res Tool Technical Documentation](https://www.hydropower.org/publications/the-ghg-reservoir-tool-g-res-technical-documentation)

#### 2A. Sensitivity analysis for a single reservoir for quick testing and prototyping purposes

In [None]:
uk_input_file = pathlib.Path("../data/uk_inputs.json").resolve()
inputs = Inputs.fromfile(uk_input_file)
reservoir_name: str = 'Katrine'
selected_input = inputs.get_input(reservoir_name) # Use a single reservoir
# Set the relative +/- difference for the inputs with missing distributions
rel_difference: float = 0.1
# Load the SALib specification for the re-emission model
spec_file = "params_reemission_reg.yaml"
reemission_salib_spec = ReEmissionSALibSpecLoader(
    spec_file=spec_file,
    input=selected_input,
    missing_input_dist_handler = 
        partial(
            set_unit_input_distribution_using_rel_diffrence,
            rel_difference=rel_difference)
)
# Var names for visualization
var_names = reemission_salib_spec.var_name_map
# Create a list of variables from the SALib specification
reemission_variables = reemission_salib_spec.list_of_variables
# Create a list of accessors from the SALib specification
accessors = reemission_salib_spec.accessors
reemission_salib_problem = SALibProblem.from_variables(
    reemission_variables
)
reemission_salib_model = ReEmissionSALibWrapper.from_variables(
    variables = reemission_variables,
    input = selected_input,
    emission = 'total_net',
    accessors = accessors
)
analyser = SobolAnalyser(
    problem = reemission_salib_problem,
    variables = reemission_variables,
    model = reemission_salib_model,
    num_samples = 2_048
)
results = analyser.run_sobol()

# Save results
file_name = pathlib.Path("sobol_outputs_uk") / 'reemission_sobol_katrine_test.pkl'
with open(file_name, 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

#### 2B. Sensitivity analysis for 38 UK reservoirs

In [None]:
# Run SOBOL analysis for a subset of UK reservoirs - can take a long time
reservoirs_list_uk = list(inputs.inputs.keys())
rprint(f"Number of reservoirs: {len(reservoirs_list_uk)}")
selected_reservoirs = reservoirs_list_uk[:]
uk_input_file = pathlib.Path("../data/uk_inputs.json").resolve()
spec_file = "params_reemission_reg.yaml"
inputs = Inputs.fromfile(uk_input_file)

for res_no, reservoir in enumerate(selected_reservoirs):
    print(f"Running SOBOL analysis for reservoir: {reservoir} - {res_no + 1} out of {len(selected_reservoirs)}")
    selected_input = inputs.get_input(reservoir) # Use a single reservoir
    # Set the relative +/- difference for the inputs with missing distributions
    rel_difference: float = 0.1
    # Load the SALib specification for the re-emission model
    reemission_salib_spec = ReEmissionSALibSpecLoader(
        spec_file=spec_file,
        input=selected_input,
        missing_input_dist_handler = 
            partial(
                set_unit_input_distribution_using_rel_diffrence,
                rel_difference=rel_difference)
    )
    # Var names for visualization
    var_names = reemission_salib_spec.var_name_map
    # Create a list of variables from the SALib specification
    reemission_variables = reemission_salib_spec.list_of_variables
    # Create a list of accessors from the SALib specification
    accessors = reemission_salib_spec.accessors
    reemission_salib_problem = SALibProblem.from_variables(
        reemission_variables
    )
    reemission_salib_model = ReEmissionSALibWrapper.from_variables(
        variables = reemission_variables,
        input = selected_input,
        emission = 'total_net',
        accessors = accessors
    )
    analyser = SobolAnalyser(
        problem = reemission_salib_problem,
        variables = reemission_variables,
        model = reemission_salib_model,
        num_samples = 8_192
    )
    results = analyser.run_sobol()

    # Save results
    file_name = pathlib.Path("../outputs_and_intermediate/sobol_outputs_uk") / f"{reservoir}.pkl"
    with open(file_name, 'wb') as handle:
        pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

### 3. Create figure for the publication

In [None]:
import importlib
import reemission.salib.visualize
importlib.reload(reemission.salib.visualize)
from reemission.salib.visualize import SobolResultVisualizer, SobolScenarioResultsVisualizer

In [None]:
import os
from typing import List
from collections.abc import Iterable
file_folder = "sobol_outputs_uk_diff_only"
sc_results: List[SobolResults] = []
sc_names: List[str] = []
number_reservoirs_to_display: int = 20
seed = 42
selected_reservoir_index = 1

for filename in os.listdir(file_folder):
    if filename.endswith('.pkl'):
        reservoir_name = os.path.splitext(filename)[0]
        sc_names.append(reservoir_name)
        file_path = os.path.join(file_folder, filename)
        with open(file_path, 'rb') as f:
            sc_result = pickle.load(f)
            object.__setattr__(sc_result, 'nominal_output', sc_result.nominal_output[0])
            sc_results.append(sc_result)

# Randomly select 20 indices from sc_results
import random
if len(sc_results) > number_reservoirs_to_display:
    random.seed(seed)
    selected_indices = random.sample(range(len(sc_results)), number_reservoirs_to_display)
    sc_results = [sc_results[i] for i in selected_indices]
    sc_names = [sc_names[i] for i in selected_indices]

sc_results = SobolScenarioResults(
    sc_names=sc_names,
    results=sc_results
)
var_names = sc_results.var_names
visualizer = SobolResultVisualizer(
    sc_results.results[selected_reservoir_index],
    par_name_map = var_names)
scenario_visualizer = SobolScenarioResultsVisualizer(sc_results)

fig, axes = plt.subplots(2, 2, figsize=(10, 8))
axes = axes.flatten()
scenario_visualizer.plot_S1_ST(
    ax=axes[0],
    x_label_rotation = 90,
    title=f'Sobol Indices for regression coefficients')

scenario_visualizer.plot_variance_contributions_by_group(
    ax=axes[1],
    title="Variance contributions by uncertainty group",)

#scenario_visualizer.plot_outputs_per_scenarios_simple(
#    ax=axes[1], 
#    scenario_names = [f'{ix}' for ix in range(len(sc_results.scenario_names))],
#    x_label_rotation = 90,
#    sorting='desc',
#    title="Outputs across multiple scenarios",)

scenario_visualizer.plot_outputs_per_scenarios(
    ax=axes[2], 
    #scenario_names = [f'{ix}' for ix in range(len(sc_results.scenario_names))],
    x_label_rotation = 90,
    sorting='desc',
    title="Total net emission predictions for multiple reservoirs",
    width=0.45,
    component_colors=['#ff7f0e', '#1f77b4', '#2ca02c', '#9467bd', '#d62728', '#8c564b'])

visualizer.plot_output_kde(
    ax=axes[3],
    xlims=(100,500),
    title=f"Total net emissions under parameter uncertainty - {sc_names[selected_reservoir_index]}")

fig.savefig(pathlib.Path('../outputs_and_intermediate/reemission_sobol_paper.png'))
fig.savefig(pathlib.Path('../outputs_and_intermediate/reemission_sobol_paper.svg'))

### The End