# Visualisation of GHG emissions from RE-Emission

- Author: Tomasz Janus
- E-mail: tomasz.k.janus@gmail.com
- Mui Ne, 22/10/2023

## 1. Import the required libraries

In [None]:
from typing import Protocol, Dict, Protocol, List, Any
from dataclasses import dataclass
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns # For plotting data

In [None]:
def hexbin(x, y, color, **kwargs):
    cmap = sns.light_palette(color, as_cmap=True)
    plt.hexbin(x, y, gridsize=15, cmap=cmap, **kwargs)

In [None]:
full_data_path = os.path.join('outputs','reemission','combined','combined_outputs.csv')
elev_data_path = os.path.join("config","elev.csv")

## 2. Input data cleanup

In [None]:
# Load data
data_full = pd.read_csv(full_data_path)
elev_data = pd.read_csv(elev_data_path)
merged_df = pd.merge(
    data_full, elev_data[['name', 'fsl_masl']], left_on='Name', right_on='name', how='inner')
merged_df.drop(columns=['name'], inplace=True)
# Perform prefiltering of data outside the pipeline
merged_df[['Soil', 'Landuse intensity', 'Treatment']] = \
            merged_df['Scenario'].str.split('_', expand=True)
#Create a version with CH4 emissions without degassing
merged_df['ch4_net_nodegassing'] = merged_df['ch4_net'] - merged_df['ch4_degassing']

In [None]:
# Create a separate dataframe for plotting only
df_plot = merged_df.copy()
df_plot['Name_Code'] = pd.factorize(df_plot['Name'])[0]
df_plot['Scenario_Code'] = pd.factorize(df_plot['Scenario'])[0]

## 3. Visual analysis of GHG estimation results

### 3a. Plot distributions of emissions per area for different combinations of categorical variables
The plots show the sensitivity of the emissions model to selection of categorical inputs such as **soil type**, **landuse intensity** and **treatment factor**.

In [None]:
y_label_co2 = r'Net CO$_2$, gCO$_{2,eq}$ m$^{-2}$ yr$^{-1}$'
y_label_ch4 = r'Net CH$_4$, gCO$_{2,eq}$ m$^{-2}$ yr$^{-1}$'
fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(11, 7))
fig.suptitle(
    r'Net CO$_2$ and CH$_4$ emissions for different soil types, landuse intensities and treatment factors', 
    fontsize=14)
ax1, ax2, ax3, ax4 = axes.ravel()
ax1.set_title('Mineral Soil')
#sns.set_style('white')
sns.set_theme(style="whitegrid")
sns.boxenplot(
    x="Landuse intensity", y="co2_net",
    hue="Treatment", palette=["m", "g", "r", "y"],
    data=merged_df[df_plot["Soil"] == "MIN"],
    ax=ax1)
ax1.legend([],[], frameon=False)
ax1.set(xlabel=r'Landuse intensity', ylabel=y_label_co2)
ax2.set_title('Organic Soil')
sns.boxenplot(
    x="Landuse intensity", y="co2_net",
    hue="Treatment", palette=["m", "g", "r", "y"],
    data=merged_df[df_plot["Soil"] == "ORG"],
    ax=ax2)
ax2.legend(bbox_to_anchor=(1.4, 0.75), borderaxespad=0, title="Treatment", frameon=False)
ax2.legend([],[], frameon=False)
ax2.set(xlabel=r'Landuse intensity', ylabel=y_label_co2)
ax3.set_title('Mineral Soil')
sns.boxenplot(
    x="Landuse intensity", y="ch4_net",
    hue="Treatment", palette=["m", "g", "r", "y"],
    data=merged_df[df_plot["Soil"] == "MIN"],
    ax=ax3)
ax3.legend([],[], frameon=False)
ax3.set(xlabel=r'Landuse intensity', ylabel=y_label_ch4)
ax4.set_title('Organic Soil')
sns.boxenplot(
    x="Landuse intensity", y="ch4_net",
    hue="Treatment", palette=["m", "g", "r", "y"],
    data=merged_df[df_plot["Soil"] == "ORG"],
    ax=ax4)
ax4.legend(bbox_to_anchor=(1.4, 0.75), borderaxespad=0, title="Treatment", frameon=False)
ax4.set(xlabel=r'Landuse intensity', ylabel=y_label_ch4)
sns.despine(offset=15, trim=False)
fig.tight_layout()
fig.savefig(os.path.join("figures","ghg_visualisation","emission_boxplots.svg"), format="svg")

#### Quantify average percentage reduction of CO2 emissions in scenario Low Intensity, Mineral Soil due to implementation of further wastewater treatment technologies in the catchment (baseline is NO treatment)

In [None]:
df_filtered = df_plot.query(' `Landuse intensity`=="LOW" & Soil=="MIN" ')
gkk_no_treatment = df_filtered.query('Treatment=="NO"').groupby(['type'])['co2_net'].mean()
gkk_prim_treatment = df_filtered.query('Treatment=="PRIM"').groupby(['type'])['co2_net'].mean()
gkk_sec_treatment = df_filtered.query('Treatment=="SEC"').groupby(['type'])['co2_net'].mean()
gkk_ter_treatment = df_filtered.query('Treatment=="TER"').groupby(['type'])['co2_net'].mean()

In [None]:
(gkk_no_treatment - gkk_prim_treatment) / gkk_no_treatment * 100

In [None]:
(gkk_no_treatment - gkk_sec_treatment) / gkk_no_treatment * 100

In [None]:
(gkk_no_treatment - gkk_ter_treatment) / gkk_no_treatment * 100

### 3b. Plot distributions of emissions from irrigation and non-irrigation (hydroelectric and multipurpose) reservoirs to see if there's a difference in emissions between those three categories of reservoirs

The data is plotted for LOW Landuse Intensity , Mineral Soil type and Primary Treatment, which are representative for the local conditions in Myanmar

In [None]:
df_plot['total'] = df_plot['co2_net'] + df_plot['ch4_net']
# Filter the dataframe to remove repeated entries
df_filt = df_plot.query(' `Landuse intensity`=="LOW" & Soil=="MIN" ')
df_irr_prim_treatment = df_filt.query('Treatment=="PRIM"')

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
palette_colors = sns.color_palette("Set2") 
fig.suptitle(
    r'Distributions of net total GHG emissions across reservoir types', 
    fontsize=14)
df1 = df_irr_prim_treatment.query('type=="hydroelectric"')
df2 = df_irr_prim_treatment.query('type=="multipurpose"')
df3 = df_irr_prim_treatment.query('type=="irrigation"')
ix1, ix2, ix3 = 0, 4, 2
p1 = sns.histplot(data=df1['total'],
             alpha=0.8,
             edgecolor=".3",
             log_scale=True,
             linewidth=1,
             label='Hydroelectric', fill=True, ax=ax, kde='true', 
             color=palette_colors[ix1], kde_kws={'cut': 5})
p2 = sns.histplot(data=df2['total'],
             alpha=0.8,
             log_scale=True,
             edgecolor=".3",
             linewidth=1,
             label='Multipurpose', fill=True, ax=ax, kde='true', 
             color=palette_colors[ix2], kde_kws={'cut': 5})
p3 = sns.histplot(data=df3['total'],
             alpha=0.8,
             log_scale=True,
             edgecolor=".3",
             linewidth=1,
             label='Irrigation', fill=True, ax=ax, kde='true', 
             color=palette_colors[ix3], kde_kws={'cut': 5})
sns.set_theme(style="whitegrid")

df_1_x, df_1_y = ax.lines[0].get_data()
df_2_x, df_2_y = ax.lines[1].get_data()
df_3_x, df_3_y = ax.lines[2].get_data()
# use fill_between to demonstrate where the extracted curves lie
ax.fill_between(df_1_x, 0, df_1_y, color=palette_colors[ix1], alpha=0.32)
ax.fill_between(df_2_x, 0, df_2_y, color=palette_colors[ix2], alpha=0.32)
ax.fill_between(df_3_x, 0, df_3_y, color=palette_colors[ix3], alpha=0.32)
#sns.histplot(data=df_irr_no_treatment, x='total', hue='type', kde='true')
l1 = ax.legend(title='Reservoir Type', fontsize=13, frameon=False, fancybox=True, shadow=True)
ax.set_xlim(10**2, 10**5)
ax.xaxis.grid(False)
ax.yaxis.grid(False)
plt.xlabel(r'Total Emission, gCO$_{2,eq}$ m$^{-2}$ yr$^{-1}$', fontsize=14)
plt.ylabel('Number of solutions', fontsize=14)
plt.tick_params(axis='both', which='major', labelsize=14)
sns.despine(offset=15, trim=False)
fig.tight_layout()
fig.savefig(os.path.join(
    "figures","ghg_visualisation","emission_histograms_per_reservoir_type.svg"), format="svg")
plt.show()

### 3c. Quantify the difference in CH4 emissions for reservoirs with deep and shallow water uptakes

The data is plotted for Mineral soil type, Low landuse intensity and primary level of wastewater treatment in the catchment

#### <span style="color: red;">TODO: QUANTIFY THE LOSS OF VOLUME REQUIRED TO OPERATE THE RESERVOIRS WITHOUT TOUCHING THE HYPOLIMNION</span>

In [None]:
# Merged the dataframe with shallow and deep intake data
merged_df_deep_intake = merged_df.copy().drop("ch4_net_nodegassing", axis=1)
merged_df_deep_intake['Intake'] = 'deep'
merged_df_shallow_intake = (merged_df.copy()
    .drop("ch4_net", axis=1)
    .rename(columns={"ch4_net_nodegassing": "ch4_net"}))
merged_df_shallow_intake['Intake'] = 'shallow'
df_combined = pd.concat([merged_df_deep_intake, merged_df_shallow_intake], ignore_index=True)
df_combined['total'] = df_combined['ch4_net'] + df_combined['co2_net']

In [None]:
# Split the DataFrame into two based on 'soil type'
df = df_combined.query('Soil == "MIN" & `Landuse intensity`=="LOW" & Treatment=="PRIM"')
fig, ax = plt.subplots(figsize=(6, 5))
fig.suptitle(
    r'Total net GHG emissions per reservoir and intake type', 
    fontsize=14)
ax = sns.boxenplot(data=df, x="total", y="type", hue="Intake")
ax.xaxis.grid(False)
ax.yaxis.grid(False)
l1 = ax.legend(
    title='Intake Type', fontsize=13, frameon=False, fancybox=True, shadow=True, loc="center right")
plt.xlabel(r'Total Emission, gCO$_{2,eq}$ m$^{-2}$ yr$^{-1}$', fontsize=14)
#plt.ylabel('Reservoir type', fontsize=14)
plt.ylabel(None)
plt.tick_params(axis='both', which='major', labelsize=13)
sns.despine(offset=15, trim=False)
ax.set_xlim(0, 8_000)
fig.tight_layout()
fig.savefig(os.path.join(
    "figures","ghg_visualisation","total_net_emission_per_res_and_intake_type.svg"), format="svg")

### 3d. Plot the distribution of percentage reductions of total emission when shallow vs deep intakes are employed
#### <span style="color: red;">TODO: Plot that versus loss of volume AND find out which reservoirs have the highest potential for emission reduction</span>

In [None]:
deep = df.query('Intake=="deep"').reset_index()
shallow = df.query('Intake=="shallow"').reset_index()
deep['reduction'] = (deep['total']-shallow['total'])/deep['total'] * 100

In [None]:
with sns.axes_style("white"):
    g = sns.FacetGrid(deep, hue="type", col="type", height=4)
g.fig.suptitle(
    r'Potential reduction in GHG emissions by employing shallow water uptakes', 
    fontsize=14)
g.map(hexbin, "reduction", 'coordinates_0', extent=[0, 100, 10, 30]); 
g.set(ylabel="Latitude, deg")
g.set(xlabel="% reduction in total GHG emissions")
axes = g.axes.flat
axes[0].set(title="Irrigation Reservoirs")
axes[1].set(title="Hydroelectric Reservoirs")
axes[2].set(title="Multipurpose Reservoirs")
g.fig.tight_layout()
g.fig.savefig(os.path.join(
    "figures","ghg_visualisation","red_by_shallow_water_uptakes.svg"), format="svg")

In [None]:
with sns.axes_style("white"):
    g = sns.FacetGrid(deep, hue="type", col="type", height=4)
g.fig.suptitle(
    r'Distribution of total net GHG emissions by reservoir type and latitude', 
    fontsize=14)
g.map(hexbin, "total", 'coordinates_0', extent=[0, 6_000, 10, 30])
g.set(ylabel="Latitude, deg")
g.set(xlabel=r'Total Emission, gCO$_{2,eq}$ m$^{-2}$ yr$^{-1}$')
axes = g.axes.flat
axes[0].set(title="Irrigation Reservoirs")
axes[1].set(title="Hydroelectric Reservoirs")
axes[2].set(title="Multipurpose Reservoirs")
g.fig.savefig(os.path.join(
    "figures","ghg_visualisation","distribution_of_ghg_emissions_res_type_lat.svg"), format="svg")

### 3e. Plot distributions of reductions on histograms with kde plots on top of them

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
palette_colors = sns.color_palette("Set2") 
fig.suptitle(
    r'Reductions in total GHG emissions via shallow water uptakes', 
    fontsize=14)
df1 = deep.query('type=="hydroelectric"')
df2 = deep.query('type=="multipurpose"')
df3 = deep.query('type=="irrigation"')
ix1, ix2, ix3 = 0, 4, 2
p1 = sns.distplot(a=df1['reduction'],
             rug=False,
             label='Hydroelectric', 
             ax=ax, kde='true', 
             color=palette_colors[ix1], kde_kws={'cut': 5})
p2 = sns.distplot(a=df2['reduction'],
             rug=False,
             label='Multipurpose',  
             ax=ax, kde='true', 
             color=palette_colors[ix2], kde_kws={'cut': 5})
p3 = sns.distplot(a=df3['reduction'],
             rug=False,
             label='Irrigation',  
             ax=ax, kde='true', 
             color=palette_colors[ix3], kde_kws={'cut': 5})

sns.set_theme(style="whitegrid")

df_1_x, df_1_y = ax.lines[0].get_data()
df_2_x, df_2_y = ax.lines[1].get_data()
df_3_x, df_3_y = ax.lines[2].get_data()
# use fill_between to demonstrate where the extracted curves lie
ax.fill_between(df_1_x, 0, df_1_y, color=palette_colors[ix1], alpha=0.32)
ax.fill_between(df_2_x, 0, df_2_y, color=palette_colors[ix2], alpha=0.32)
ax.fill_between(df_3_x, 0, df_3_y, color=palette_colors[ix3], alpha=0.32)
#sns.histplot(data=df_irr_no_treatment, x='total', hue='type', kde='true')
l1 = ax.legend(title='Reservoir Type', fontsize=13, frameon=False, fancybox=True, shadow=True,
              loc="upper left")
ax.set_xlim(0, 100)
ax.xaxis.grid(False)
ax.yaxis.grid(False)
plt.xlabel(r'Reduction in total emission %', fontsize=14)
plt.ylabel('Density', fontsize=14)
plt.tick_params(axis='both', which='major', labelsize=14)
sns.despine(offset=15, trim=False)
fig.tight_layout()
plt.show()
fig.savefig(os.path.join(
    "figures","ghg_visualisation","reductions_gas_emissions.svg"), format="svg")

In [None]:
# Find statistics
print("-----------------------------------------------------------")
print("GHG emission reduction in hydroelectric reservoirs")
print("-----------------------------------------------------------")
print(deep.query('type=="hydroelectric"')['reduction'].describe())
print("-----------------------------------------------------------")
print("GHG emission reduction in irrigation reservoirs")
print("-----------------------------------------------------------")
print(deep.query('type=="irrigation"')['reduction'].describe())
print("-----------------------------------------------------------")
print("GHG emission reduction in multipurpose reservoirs")
print("-----------------------------------------------------------")
print(deep.query('type=="multipurpose"')['reduction'].describe())

### 3f. Plot all emissions across all reservoirs dividing the results into HP and non-HP domain

#### <span style="color: red;">TODO: Plot the values in sorted order and create maps</span>

In [None]:
def plot_emission_split(
        df: pd.DataFrame, col_indices, file_name: str | None = None,
        xlimits = ((1000,7000), (100, 800), (1000,7000)),
        height=20, aspect=.2):
    """ """
    x_vars = []
    sns.set_theme(style="whitegrid")
    g = sns.PairGrid(df,
                     x_vars=df.columns[col_indices], y_vars=["Name"], hue='Treatment',
                     height=height, aspect=aspect)
    g.map(sns.stripplot, size=10, orient="h", jitter=True, alpha=0.8,
          linewidth=1, edgecolor="w", palette="flare_r") #palette=["m", "g", "r", "y"])
    # Use the same x axis limits on all columns and add better labels
    g.set(xlabel=r"Emission, gCO$_{2,eq}$ m$^{-2}$ yr$^{-1}$", ylabel="") #, xscale='log')
    g = g.add_legend()
    # Use semantically meaningful titles for the columns
    titles = [
        "Total emission", 
        "CO$_{2}$ net emission", 
        "CH$_{4}$ net emission"]
    axes = g.axes.flat
    axes[0].set_xlim(*xlimits[0])
    axes[1].set_xlim(*xlimits[1])
    axes[2].set_xlim(*xlimits[2])
    for ax, title in zip(g.axes.flat, titles):
        # Set a different title for each axes
        ax.set(title=title)
        # Make the grid horizontal instead of vertical
        ax.xaxis.grid(False)
        ax.yaxis.grid(True)
        
    sns.despine(left=True, bottom=True)
    plt.savefig(file_name, format="svg")

In [None]:
# Calculate total emission
df_plot['total'] = df_plot['co2_net'] + df_plot['ch4_net']
# Filter the dataframe to remove repeated entries
df_filt = df_plot.query(' `Landuse intensity`=="LOW" & Soil=="MIN" ')
col_indices = df_plot.columns.get_indexer(['total', 'co2_net', 'ch4_net'])
df_1 = df_filt.query('type=="irrigation"')
df_sorted = df_1.sort_values("total", ascending=False)

plot_emission_split(
    df_sorted.iloc[:int(len(df_sorted)/2)+4,:], 
    col_indices, 
    os.path.join("figures","ghg_visualisation","tot_irr_plot_irrig_1.svg"))

In [None]:
plot_emission_split(
    df_sorted.iloc[(int(len(df_sorted)/2)+5):,:], 
    col_indices, 
    os.path.join("figures","ghg_visualisation","tot_irr_plot_irrig_2.svg"),
    xlimits = ((0,2500), (0, 700), (0,2500)))

In [None]:
df_2 = df_filt.query('type=="hydroelectric" | type=="multipurpose"')
df_sorted = df_2.sort_values("total", ascending=False)
plot_emission_split(
    df_sorted, 
    col_indices, 
    os.path.join("figures","ghg_visualisation","tot_irr_plot_hp_multi_1.svg"),
    xlimits = ((0,4000), (0, 700), (0,4000)),
    height = 20, aspect=.2)

### END