In [1]:
import os

import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import datetime as dt
import math
from datetime import datetime

# Introduction

Make river input scenarios for OF800

Questions (maybe for Miljødirektoratet) & future improvement needs:
* Currently don't do any scenarios for Sweden. Should we do something simple there?
* Andre used the wrong area-scaling of discharge. Therefore baseline inputs are also wrong.

# User input

In [2]:
# User input
# Version number
v = 2  # For file naming

# Filepath to baseline daily river data
fpath_baseline_data = r"/home/jovyan/shared/common/oslofjord_modelling/MARTINI800v10_river_inputs/martini_rivers_v9_1990_2022_stage1data.nc"

# Filepath to river metadata csvs
real_riv_metadata = r"../data/real_river_metadata.csv"
riv_no_mapping_fpath = r"../data/oslofjord_location_ids.csv"

# TEOTIL results (annual source apportionment)
# Baseline data
teotil_res_csv_fpath = (
    r"/home/jovyan/shared/common/teotil3/evaluation/teo3_results_nve2024_2013-2023_agri-annual-loss.csv"
)
# WWTW scenario data
pc_reduction = 80  # 80 or 85% (80% was the most up-to-date estimate of what would be in the new WWTD in April 2024)
teotil_scen_res_folder = f"/home/jovyan/shared/common/oslofjord_modelling/wastewater_scenarios/scen_totn_{pc_reduction}pct_effect"
teotil_scen_res_fname = "teo3_scenario_results_nve2023_2013-2022.csv"
teotil_scen_res_fpath = os.path.join(teotil_scen_res_folder, teotil_scen_res_fname)

# Start and end year to summarise TEOTIL data over
start_year = 2013  # 2016
end_year = 2022

# Folders for output files
netcdf_outfolder = r"/home/jovyan/shared/common/oslofjord_modelling/MARTINI800v10_river_inputs/river_scenarios"
csv_exploration_folder = r"../output/csvs"
fig_folder = r"../output/plots"

# REAL river numbers to use
# full OF800 domain: model rivs 4-29 incl. Drop Sweden, becomes 7 to 29 (incl.)
oslofjord_riv_nos = range(6, 24)  # Whole OF domain, excluding 2 Swedish rivers
print(f"Generating scenarios for real rivers:\n{list(oslofjord_riv_nos)}")

# Species to alter in the scenarios (teotil3 species; mapping to of800 species done below)
par_list = ['din', 'tdp']

Generating scenarios for real rivers:
[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]


# Set up & read in river metadata

In [3]:
var_mapping_teotil3_to_of800 = {
    'din': ['river_NH4N', 'river_NO3NO2N'],  # sum
    'ton': ['river_DON', 'river_PON'],  # sum
    'totn': ['river_totn'],  # sum
    'totp': ['river_totp'],  # sum
    'tdp': ['river_SRP', 'river_DOP'],  # sum
    'tpp': ['river_POP', 'river_TIP', 'river_SRP'],  # Assume river tpp = pop + tip - srp
    'toc': ['river_DOC', 'river_POC'],  # sum
    'ss': ['river_SPM']
}

# Add derived chemvars to the list
derived_chemvars = ['din']

chem_var_li = []
for par in par_list:
    chem_var_li.extend(var_mapping_teotil3_to_of800[par])
# chem_var_li.extend(derived_chemvars)

In [4]:
# Calculate start date (inclusive), end date (day after last day)
start_date = dt.datetime(start_year, 1, 1)
end_date = dt.datetime(end_year, 12, 31)
end_date += pd.Timedelta(days=1)

# River chemistry metadata
river_meta_df = pd.read_csv(real_riv_metadata, index_col=0, dtype={'Vassom':str})
# Limit to just Oslofjord rivers
river_meta_df = river_meta_df[river_meta_df.index.isin(oslofjord_riv_nos)]
# Add 'total' row for use later
river_meta_df.loc['Total', ['river_name', 'Regine', 'Regine_to_sea', 'Vassom']] = 'Total'
river_meta_df.query('real_river in @oslofjord_riv_nos')

Unnamed: 0_level_0,river_name,Outflow_lat,Outflow_lon,Regine,Regine_to_sea,Vassom,Vassom_area_land,Vassom_area_tot,Andre_MCA_area,Andre_area_q,Overestimate (%),Comment
real_river,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
6,Tista,59.119,11.37,001.A1,001.A1,1.0,2495.0,2507.0,2507.0,1584.0,0.0,
7,Glomma,59.206,10.953,002.A51,002.A11,2.0,42446.0,43116.0,43116.0,41967.0,2.0,Monitoring point upstream of Sarpsborg RA. So ...
8,Mosseelva,59.439,10.662,003.A1,003.A1,3.0,854.0,1052.0,1054.0,694.0,23.0,
9,Hølenelva,59.523,10.69,004.A0,004.A0,4.0,204.0,227.0,,,11.0,
10,Årungen,59.72,10.728,005.3A,005.3A,5.0,280.0,368.0,144.0,85.0,31.0,
11,Akerselva,59.908,10.756,006.A10,006.A10,6.0,392.0,415.0,415.0,307.0,6.0,
12,Lysakerelva,59.914,10.64,007.A0,007.A0,7.0,202.0,211.0,211.0,177.0,4.0,
13,Sandvikselva,59.89,10.523,008.A11,008.A11,8.0,279.0,311.0,311.0,226.0,11.0,
14,Åros,59.704,10.519,009.A0,009.A0,9.0,215.0,253.0,,,18.0,
15,Tofteelva,59.547,10.568,010.2Z,010.2Z,10.0,114.0,191.0,,,68.0,One model river with 16


# TEOTIL source apportionment

## Read in data

TEOTIL columns:
* 'year',
* 'regine',
* 'regine_down',
* 'accum_agriculture-background_din_kg',
* 'accum_agriculture-background_ss_kg',
* 'accum_agriculture-background_tdp_kg',
* 'accum_agriculture-background_toc_kg',
* 'accum_agriculture-background_ton_kg',
* 'accum_agriculture-background_totn_kg',
* 'accum_agriculture-background_totp_kg',
* 'accum_agriculture-background_tpp_kg',
* ... repeat for all the other sources, e.g. instead of 'agriculture-background', have 'urban', 'wood', etc.
* ... repeat with local_ instead of accum_ for local inputs to the regine, rather than accumulated upstream inputs. Here, we're interested in accumulated upstream inpust.

i.e. param choices are totn, din, ton, ss, totp, tdp, tpp, toc

In [5]:
def extract_source(col_name):
    """
    Function to rename teotil columns
    """
    parts = col_name.split('_')
    return parts[1]


def read_teotil_data(teotil_data_fpath, par):
    """
    Function to read raw TEOTIL data, pick the columns of interest (just accumulated inputs
    from all upstream areas), rename columns, truncate to the period of interest.

    Do this for areas upstream of a given regine of interest
    """
    df = pd.read_csv(teotil_data_fpath)

    # Just pick out accumulated inputs (from all upstream areas) for the outflow reginer of interest
    # Also just pick columns for the single parameter of interest
    cols = ["regine", "year"] + [
        col for col in df.columns if f"_{par}_" in col and col.startswith("accum")
    ]
    df = df.loc[df['regine'].isin(river_meta_df['Regine'])][cols]

    # Truncate to start of model period onwards (currently to 2022)
    df = df.loc[df['year'] >= start_year]

    # Rename columns
    cols_to_exclude = ['regine', 'year']
    new_col_names = {col: extract_source(col) for col in df.columns if col not in cols_to_exclude}
    new_col_names = cols_to_exclude + list(new_col_names.values())
    col_dict = dict(zip(df.columns, new_col_names))
    df = df.rename(columns=col_dict)

    return df


def teotil_average_over_years(res_df):
    """
    Average over all years to get a single value per main catchment area (or real river).
    Replace 'regine' index with river numbers
    """

    # Average over the year column per regine
    teotil_av_df = res_df.groupby('regine').mean().drop('year', axis=1)

    # Replace 'regine' index with river numbers, for easier matching to the river data
    mapping_dict = river_meta_df.reset_index().set_index('Regine')['real_river'].to_dict()
    teotil_av_df.index = teotil_av_df.index.map(mapping_dict)
    teotil_av_df.index.name = 'real_river'

    return teotil_av_df

In [6]:
# Read baseline TEOTIL data (units kg/yr) and average over time

bsl_teotil_av_df_dict = {}  # key: par, returns df of time-averaged loads with real_river index, one column per source

for par in par_list:
    tmp_bsl_teotil_df = read_teotil_data(teotil_res_csv_fpath, par)
    tmp_bsl_teotil_av_df = teotil_average_over_years(tmp_bsl_teotil_df)
    bsl_teotil_av_df_dict[par] = tmp_bsl_teotil_av_df

tmp_bsl_teotil_av_df.tail()

Unnamed: 0_level_0,agriculture-background,agriculture,aquaculture,industry,large-wastewater,spredt,upland,urban,wood
real_river,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
19,132.379457,1719.620981,0.0,2.490909,0.0,699.577193,3.346944,451.203821,168.094473
20,560.435115,6172.8668,0.0,0.0,23.896295,1379.400918,4.133556,905.739596,245.095423
21,556.005103,6426.046169,0.0,1.818182,217.463828,3012.089593,1347.848205,2303.326467,3249.28574
22,583.663558,7873.287173,0.0,0.0,391.950108,2381.770284,3039.942105,4292.360247,4970.509126
23,49.88955,480.172549,0.0,137.256431,9.193679,65.016013,55.796393,324.680945,890.629429


## Define how TEOTIL sources are combined into different functional groups

In [8]:
# All sources considered  by TEOTIL
all_source_li = [
    "agriculture-background",  # Estimated natural runoff from agricultural land (i.e. non-human)
    "agriculture",
    "aquaculture",
    "industry",
    "lake",  # Direct deposition to lakes
    "large-wastewater",
    "spredt",  # Small works not connected to the main sewage system. Excludes watertight septic tanks
    "upland",
    "urban",  # Note: probably too big
    "wood",
]

point_source_li = ["aquaculture", "industry", "large-wastewater"]

diffuse_source_li = [
    "agriculture-background",
    "agriculture",
    "lake",
    "spredt",
    "upland",
    "urban",
    "wood",
]

human_diffuse_source_li = ['agriculture', 'spredt', 'urban']

human_source_li = [
    "agriculture",
    "aquaculture",
    "industry",
    "large-wastewater",
    "spredt",  # Small works not connected to the main sewage system. Excludes watertight septic tanks
    "urban",  # Note: probably too big
    # "lake"  # arguably mostly human, but not something local management can affect
]

natural_source_li = [
    "agriculture-background",  # Estimated natural runoff from agricultural land (i.e. non-human)
    "lake",  # Direct deposition to lakes (not really natural)
    "upland",
    "wood",  # Impacted by humans...
]

## TEOTIL proportion of load input per source per model river

Do this for all sources individually, and then split into point vs diffuse

In [19]:
f_sector_df_dict = {}

for par in par_list:
    # Total accumulated input per regine from all sources
    bsl_teotil_av_df = bsl_teotil_av_df_dict[par].copy()
    teotil_total_s = bsl_teotil_av_df.sum(axis=1)

    # Proportion per regine and source (checked that rows sum to 1, they do)
    tmp_f_sector_df = bsl_teotil_av_df.divide(teotil_total_s, axis=0)
    # print(f_sector_df.sum(axis=1).tail())

    # Remove any rows where real_river=NaN, i.e. Sweden (if present)
    tmp_f_sector_df = tmp_f_sector_df.reset_index().dropna(subset=['real_river']).set_index('real_river')

    # Select just the rivers of interest in the current domain
    tmp_f_sector_df = tmp_f_sector_df[tmp_f_sector_df.index.isin(oslofjord_riv_nos)]

    # Also calculate total flux over all rivers
    # Add as another row to f_sector_df with index='Total'
    total_flux = bsl_teotil_av_df.sum().sum()
    tmp_f_sector_df.loc['Total'] = bsl_teotil_av_df.sum(axis=0) / total_flux

    f_sector_df_dict[par] = tmp_f_sector_df

    print(par)
    print((tmp_f_sector_df.loc['Total', :] * 100).round(1))
    print("--------------------")

# tmp_f_sector_df.tail()

din
agriculture-background     6.8
agriculture               61.2
aquaculture                0.0
industry                   0.1
lake                       4.3
large-wastewater          11.9
spredt                     2.9
upland                     2.2
urban                      3.7
wood                       6.8
Name: Total, dtype: float64
--------------------
tdp
agriculture-background     3.8
agriculture               43.7
aquaculture                0.0
industry                   1.5
large-wastewater           2.3
spredt                    11.4
upland                     8.3
urban                     15.5
wood                      13.4
Name: Total, dtype: float64
--------------------


In [32]:
# # This isn't used in the rest of the notebook, but out of interest,
# # look at proportion of human vs natural, and point vs diffuse

# # Sum up to estimate point, diffuse, human sums
# f_groupsector_df = pd.DataFrame(index=f_sector_df.index)
# f_groupsector_df['point'] = f_sector_df[point_source_li].sum(axis=1)
# f_groupsector_df['diffuse'] = f_sector_df[diffuse_source_li].sum(axis=1)
# f_groupsector_df['human'] = f_sector_df[human_source_li].sum(axis=1)
# f_groupsector_df['natural'] = f_sector_df[natural_source_li].sum(axis=1)

# # Sum up diffuse and point inputs in separate dfs
# f_sector_df.drop('Total', axis=0, inplace=True)  # just in case
# f_groupsector_df.drop('Total', axis=0, inplace=True)  # just in case

# f_sector_diffuse_df = pd.DataFrame(index=f_sector_df.index)
# for sector in diffuse_source_li:
#     f_sector_diffuse_df[sector] = f_sector_df[sector] / f_groupsector_df['diffuse']

# f_sector_point_df = pd.DataFrame(index=f_sector_df.index)
# for sector in point_source_li:
#     f_sector_point_df[sector] = f_sector_df[sector] / f_groupsector_df['point']

# print(f_sector_diffuse_df.tail())
# print(f_sector_point_df.tail())

# f_groupsector_df.tail()

# Read in daily river data netcdf & calculate daily loads

## Read in and tidy

**TO DO:** units check. if not micrograms/l for concentrations, convert to this

In [20]:
riv_var_li = chem_var_li.copy() + ['river_transport']

ds = xr.open_dataset(fpath_baseline_data)

# Select just the variables of interest
ds = ds[riv_var_li]
ds

In [21]:
# Convert to dataframe
# N.B. loose units info then. m3/s for Q, ug/l for rest
conc_df = ds.to_dataframe()
ds.close()

conc_df = conc_df.reset_index()  # Drop multiindex

# # Add river name to df
# conc_df['river_name'] = conc_df['real_river'].map(river_meta_df['river_name'])

# Select just the Oslofjord rivers
conc_df = conc_df[conc_df['real_river'].isin(oslofjord_riv_nos)]

# Calculate DIN (and TON?)
conc_df['din'] = conc_df['river_NH4N'] + conc_df['river_NO3NO2N']
# Assume particulate organic N = 0, and that this is more robust than TON = PON+DON
# conc_df['ton'] = conc_df['river_TOTN'] - conc_df['din']

# Convert all numeric columns to float64 (chem cols were float32)
conc_df[riv_var_li] = conc_df[riv_var_li].astype(float)

# Round concentrations to 3 d.p. (N.B. only appropriate for ug/l)
conc_df[chem_var_li] = conc_df[chem_var_li].round(3)

# Truncate to start and end date
conc_df = conc_df.query('@start_date <= river_time <= @end_date')

conc_df.tail()

Unnamed: 0,river_time,real_river,river_NH4N,river_NO3NO2N,river_SRP,river_DOP,river_transport,din
470046,2022-12-31 12:00:00,19.0,19.452,113.672,4.435,14.105,16.971445,133.12413
470047,2022-12-31 12:00:00,20.0,274.545,1641.914,15.215,17.364,39.055298,1916.458252
470048,2022-12-31 12:00:00,21.0,23.371,245.374,4.409,1.765,114.3983,268.745117
470049,2022-12-31 12:00:00,22.0,14.959,186.198,1.735,0.0,152.18779,201.156967
470050,2022-12-31 12:00:00,23.0,10.357,161.186,1.956,2.663,56.865475,171.542664


In [22]:
# Explore for specific rivers/times
temp = conc_df.loc[conc_df['real_river'] == 14, ['river_time', 'river_NO3NO2N', 'river_transport']]
temp

Unnamed: 0,river_time,river_NO3NO2N,river_transport
327652,2013-01-01 12:00:00,1028.438,4.545937
327691,2013-01-02 12:00:00,1026.476,3.459199
327730,2013-01-03 12:00:00,1024.265,3.295671
327769,2013-01-04 12:00:00,1021.918,3.516406
327808,2013-01-05 12:00:00,1019.429,3.357926
...,...,...,...
469885,2022-12-27 12:00:00,1267.657,28.685225
469924,2022-12-28 12:00:00,1271.730,28.685225
469963,2022-12-29 12:00:00,1275.620,28.685225
470002,2022-12-30 12:00:00,1279.250,28.685225


## Calculate daily & annual loads from the river data

In [28]:
chem_var_li

['river_NH4N', 'river_NO3NO2N', 'river_SRP', 'river_DOP']

In [23]:
# Daily loads (kg/day)

# Make empty dataframe with just time & location info
daily_load_df = conc_df[['river_time', 'real_river', 'river_transport']].copy()

# Calculate load
for var in chem_var_li:
    # Units ug/l * m3/s * l/m3 * kg/ug * s/d = kg/d
    daily_load_df[var] = conc_df[var] * conc_df['river_transport'] * 10**3 * 10**-9 * 86400

chem_cols = daily_load_df.columns.drop(['river_time', 'real_river'])

daily_load_df.head()

Unnamed: 0,river_time,real_river,river_transport,river_NH4N,river_NO3NO2N,river_SRP,river_DOP
327644,2013-01-01 12:00:00,6.0,107.809326,99.304292,4136.529999,61.765947,74.555065
327645,2013-01-01 12:00:00,7.0,324.078766,692.954032,10022.521099,152.938214,67.984984
327646,2013-01-01 12:00:00,8.0,22.987099,199.130873,1214.673897,14.166747,55.131743
327647,2013-01-01 12:00:00,9.0,4.256338,68.907812,444.08025,6.296207,11.237632
327648,2013-01-01 12:00:00,10.0,5.223431,48.569385,674.590934,15.410242,32.430286


In [24]:
# Annual loads (kg/year)
annual_load_df = daily_load_df.copy().set_index('river_time')
annual_load_df = annual_load_df.groupby('real_river').resample('YE').sum()

# Tidy
annual_load_df.drop('real_river', axis=1, inplace=True)
annual_load_df.reset_index(inplace=True)

# # Convert to tonnes (10**3 kg) per year
# annual_load_df[chem_cols] = annual_load_df[chem_cols]/1000

annual_load_df['year'] = annual_load_df['river_time'].dt.year
annual_load_df.drop('river_time', axis=1, inplace=True)

annual_load_df

  annual_load_df = annual_load_df.groupby('real_river').resample('YE').sum()


Unnamed: 0,real_river,river_transport,river_NH4N,river_NO3NO2N,river_SRP,river_DOP,year
0,6.0,11638.923275,12148.756964,469662.678070,7494.151417,10415.829493,2013
1,6.0,17362.959624,24691.520987,717953.789318,10876.390786,14988.708546,2014
2,6.0,15944.471274,12583.748320,660110.994492,10766.769241,16454.385798,2015
3,6.0,9828.959388,4797.933241,428991.766840,8368.542909,7715.282000,2016
4,6.0,11269.550617,13040.528094,550383.871192,8713.595742,10699.462994,2017
...,...,...,...,...,...,...,...
175,23.0,15082.192174,10391.419472,171903.995662,2352.037634,3434.938267,2018
176,23.0,22771.725836,15494.641631,259391.318216,3676.820639,5246.290807,2019
177,23.0,23233.821154,17605.544572,266397.212112,3868.355357,5544.571538,2020
178,23.0,14801.839863,9994.525612,175106.990883,2456.633355,3551.098210,2021


# Split river data annual loads by sector
Using proportions from TEOTIL averaged over the period 2013-2022 (at the moment)

In [33]:
# par_of800 = 'river_NO3NO2N'
# par_teotil = 'din'

annual_groupsector_df_dict = {}

# Loop through each TEOTIL parameter
for par_teotil in par_list:
    # Loop through each corresponding OF800 variable mapped to the current TEOTIL parameter
    for par_of800 in var_mapping_teotil3_to_of800[par_teotil]:

        # Extract annual load data for the current OF800 variable
        annual_load_df_var = annual_load_df[['real_river', 'year', par_of800]].copy()
        annual_load_df_var.set_index('real_river', inplace=True)

        # Get the sector fraction dataframe for the current TEOTIL parameter
        f_sector_df = f_sector_df_dict[par_teotil].copy()

        # Remove the 'Total' row, which is not needed for calculations
        f_sector_df = f_sector_df.drop(['Total'], axis=0)

        # Join annual load data for the single of800 variable with sector fractions for
        # the corresponding teotil3 variable (result: df with index real_river, columns
        # year, par_of800, and one per sector
        merged_df = annual_load_df_var.join(f_sector_df, how='left')

        # Drop any rivers that are not present in TEOTIL (i.e., rows with any missing values)
        merged_df.dropna(how='any', inplace=True)

        # Initialize a new dataframe to store annual loads per sector per of800 variable
        annual_sector_df = merged_df[['year']].copy()

        # Calculate sector-specific loads by multiplying total load with sector fractions
        for sector in f_sector_df.columns:
            annual_sector_df[sector] = merged_df[par_of800] * merged_df[sector]

        # Reset index to make 'real_river' a column & drop years outside the desired date range
        annual_sector_df.reset_index(inplace=True)
        annual_sector_df = annual_sector_df.query('@start_year <= year <= @end_year')

        # Sum up to estimate point, diffuse, human sums
        annual_groupsector_df = annual_sector_df[['real_river', 'year']].copy()
        annual_groupsector_df['point'] = annual_sector_df[point_source_li].sum(axis=1)
        annual_groupsector_df['diffuse'] = annual_sector_df[diffuse_source_li].sum(axis=1)
        annual_groupsector_df['human'] = annual_sector_df[human_source_li].sum(axis=1)
        annual_groupsector_df['natural'] = annual_sector_df[natural_source_li].sum(axis=1)
        annual_groupsector_df['total'] = annual_groupsector_df['point'] + annual_groupsector_df['diffuse']

        annual_groupsector_df_dict[par_of800] = annual_groupsector_df

annual_groupsector_df.round(1)

Unnamed: 0,real_river,year,agriculture-background,agriculture,aquaculture,industry,large-wastewater,spredt,upland,urban,wood
0,6.0,2013,496.818261,3407.268180,0.0,3822.052500,52.694068,748.354240,68.526917,1028.878019,791.237308
1,6.0,2014,714.937214,4903.166831,0.0,5500.054605,75.828433,1076.905454,98.612404,1480.588057,1138.615547
2,6.0,2015,784.847654,5382.625089,0.0,6037.879789,83.243349,1182.211113,108.255260,1625.367991,1249.955553
3,6.0,2016,368.006503,2523.854185,0.0,2831.095966,39.031898,554.325894,50.759711,762.117320,586.090523
4,6.0,2017,510.347122,3500.051516,0.0,3926.130829,54.128981,768.732677,70.392974,1056.895401,812.783494
...,...,...,...,...,...,...,...,...,...,...,...
175,23.0,2018,85.145853,819.504318,0.0,234.253786,15.690733,110.961994,95.226986,554.128796,1520.025801
176,23.0,2019,130.045978,1251.655091,0.0,357.783281,23.964958,169.475793,145.443214,846.338589,2321.583902
177,23.0,2020,137.439813,1322.818626,0.0,378.125246,25.327498,179.111432,153.712468,894.457632,2453.578823
178,23.0,2021,88.025246,847.217647,0.0,242.175590,16.221350,114.714416,98.447295,572.867872,1571.428794


# Estimate daily loads from point sources vs diffuse sources
Key assumptions: constant daily load from point sources, irrespective of discharge; input from diffuse sources increases with discharge in an undefined way. So first estimate point source contribution to load, then estimate diffuse input by difference:
- Total daily load (already calculated above), L_tot =C_t Q_t
- Daily load from point sources, L(pt,daily) = L_pt,whole-period / number of days in the model period
- Daily load from diffuse sources, L(diffuse, daily) = L(tot, daily) - L(pt, daily)
- Plots to check looks sensible.


In [None]:
# Estimate point sources as a constant daily input over the whole period

# Sum up over the whole model period (start of start year to end of end year)
annual_point_df_sum = (
    annual_groupsector_df[["real_river", "year", "point"]].groupby("real_river").sum()
)
annual_point_df_sum.drop("year", axis=1, inplace=True)

# Divide by the number of days in the whole model period
tot_days = end_date - start_date
tot_days = tot_days.days

daily_point_df = annual_point_df_sum / tot_days
daily_point_df.head()

In [None]:
# Get daily load data for just the TEOTIL chemical variable of interest
daily_load_df_var = daily_load_df[['real_river', 'river_time', par]].copy()

# Reformat & rename columns
daily_load_df_var.columns = ['real_river', 'date', 'total']
# Drop 12:00 time info from the dates, for compatibility with other dfs
daily_load_df_var['date'] = daily_load_df_var['date'].dt.normalize()
daily_load_df_var.set_index(['real_river', 'date'], inplace=True)
# print(daily_load_df_var.head())

# Join the daily load df with the estimated daily point inputs
calc_load_df = daily_load_df_var.join(daily_point_df, on='real_river', how='outer')

# Estimate diffuse inputs by subtracting point from total
calc_load_df['diffuse_raw'] = calc_load_df['total'] - calc_load_df['point']

calc_load_df

## Plot uncorrected

In [None]:
real_rivers = calc_load_df.index.get_level_values('real_river').unique()

n_rows = int(np.ceil(len(real_rivers) / 3))
fig, axs = plt.subplots(n_rows, 3, figsize=(15, n_rows*3))
axs = axs.flatten()

# For each real_river
for i, real_river in enumerate(real_rivers):
    name = river_meta_df.loc[real_river, 'river_name']
    df = calc_load_df.loc[real_river]
    df.plot(ax=axs[i])
    axs[i].axhline(0, color='r')
    axs[i].set_xlabel('')
    axs[i].set_ylabel(r'Daily %s load (kg)' % par)
    axs[i].set_title(f'{name} (real river {real_river})')
    axs[i].legend()

plt.tight_layout()
plt.show()

**Comments**

**This is extremely influenced by the choice of whether to summarise TEOTIL inputs over the whole vassdragsområde or just the downstream regine associated with the main river**. The whole vassdragsområde includes point source inputs directly to the sea (e.g. VEAS), so of course this approach won't work for the whole vassdragsområde, unless these direct land-sea fluxes are first subtracted from the TEOTIL point sources. Perhaps something to look into for the future. For now, just go with the downstream REGINE as the TEOTIL unit.

Then, point source inputs are minor. The river with the highest point source inputs is in fact Glomma, at 20%. The approach works well everywhere including there, i.e. after subtracting daily point inputs during low flows, we're not going particularly negative.

There are however small negative values that need to be removed. These are small, so it's not worth fiddling too much... Approach: sum up the absolute of the negative over the whole period, divide by the days in the period, and then subtract from the diffuse load for days where this is > (total - point) inputs

## Redistribute negative mass

In [None]:
def remove_negatives(row):
    if row['point'] > row['total']:
        row['leftover'] = row['point'] - row['total']
        row['point'] = row['total']
        row['diffuse'] = 0
    else:
        row['point'] = row['point']
        row['diffuse'] = row['diffuse']
        row['leftover'] = 0
    return row

def redistribute_leftover(df):
    leftover_mass = df['leftover'].sum()
    n_days = len(df[df['diffuse'] > 0])
    mask = df['diffuse'] > 0
    df.loc[mask, 'point'] = df.loc[mask, 'point'] + leftover_mass / n_days
    df.loc[mask, 'diffuse'] = df.loc[mask, 'total'] - df.loc[mask, 'point']
    df['leftover'] = 0
    return df



In [None]:
calc_load_df = daily_load_df_var.join(daily_point_df, on='real_river', how='outer')

riv_nos = calc_load_df.index.get_level_values(0).unique().tolist()

daily_load_dict = {}  # Key: river number, returns df with datetime index, one col per source
                      # or combined sources

# Apply to real data
for riv_no in riv_nos:
    df = calc_load_df.loc[riv_no].copy()

    df['diffuse_raw'] = df['total'] - df['point']
    df['diffuse'] = df['diffuse_raw']
    df['leftover'] = 0
    df['point_raw'] = df['point']

    # print(f'Start {riv_no}')
    # print(df.round(1))

    counter = 0
    max_iterations = 20
    while len(df.loc[df['diffuse'] < 0]) > 0:
        # print('iteration %s' % counter)
        if counter >= max_iterations:
            print(f"Maximum number of iterations reached: {max_iterations}")
            break
        df = df.apply(remove_negatives, axis=1)
        df = redistribute_leftover(df)
        counter += 1

    print(f"River {riv_no}: {counter} iterations to redistribute mass")

    # Check original point source inputs and final ones are equal
    if df['point_raw'].sum().round(1) != df['point'].sum().round(1):
        print("New point source doesn't add up")

    df = df[['total', 'diffuse', 'point']]
    daily_load_dict[riv_no] = df

## Plot result

In [None]:
n_rows = int(np.ceil(len(real_rivers) / 3))
fig, axs = plt.subplots(n_rows, 3, figsize=(15, n_rows * 3))
axs = axs.flatten()
color_li = ["k", "b", "r"]

# For each real_river
for i, real_river in enumerate(real_rivers):
    name = river_meta_df.loc[real_river, "river_name"]
    df = daily_load_dict[real_river] / 1000
    df.plot(ax=axs[i], color=color_li, linewidth=0.5)
    axs[i].axhline(0, color="0.7", linewidth=0.5)
    axs[i].set_xlabel("")
    axs[i].set_ylabel(r"%s load (T/day)" % par)
    axs[i].set_title(f"{name} (real river {real_river})")
    axs[i].legend()

plt.tight_layout()
fpath = os.path.join(fig_folder, f"daily_riv-only_pt-vs-diffuse_ts_v{v}.png")
plt.savefig(fpath)
plt.show()

# Daily loads from all sectors

Do this based on:
1) The proportion of point source inputs which are large sewage vs industry
2) The proportion of diffuse source inputs which are agriculture, urban and spredt (the rest are "natural")

Then estimate daily loads as this proportion multiplied by the total daily load from point of diffuse sources.

In [None]:
for riv_no in riv_nos:
    # Alter df inplace in the dictionary (unless make a copy here)
    df = daily_load_dict[riv_no]

    for source in human_diffuse_source_li:
        df[source] = f_sector_diffuse_df.loc[riv_no, source] * df["diffuse"]

    for source in point_source_li:
        df[source] = f_sector_point_df.loc[riv_no, source] * df["point"]
        # f_sector_point_df has NaNs where the total point load was 0. Results in
        # NaNs for the daily load per sector. Replace with 0
        df[source]= df[source].fillna(0)

    df["background"] = df["total"] - df[human_diffuse_source_li + point_source_li].sum(axis=1)

    # Rename 'spredt'
    df.rename(columns={"spredt": "small-wastewater"}, inplace=True)

If none of the rivers have any aquaculture inputs (they probably shouldn't), then drop this column altogether

In [None]:
rivs_with_aquaculture = []
for riv_no in riv_nos:
    # Alter df inplace in the dictionary (unless make a copy here)
    df = daily_load_dict[riv_no]
    if df['aquaculture'].eq(0).all():
        # print(f'{riv_no}: all aquaculture inputs = 0')
        pass
    else:
        rivs_with_aquaculture.append(riv_no)

if len(rivs_with_aquaculture) == 0:
    print('Dropping aquaculture column')
    for riv_no in riv_nos:
        df = daily_load_dict[riv_no]
        df.drop('aquaculture', axis=1, inplace=True)
else:
    print(f'Rivers with aquaculture inputs: {rivs_with_aquaculture}')

In [None]:
# Quick look at output e.g. for Glomma
daily_load_dict[7].head()

## Plot by real river

**To do:**
- Turn into stacked line chart?
- Use this function for plots above (& move function up!)

In [None]:
cols_to_plot = ['total', 'large-wastewater', 'agriculture', 'urban', 'industry', 'small-wastewater', 'background']


def plot_timeseries_allrivs(real_rivers_li, daily_load_dict, cols_to_plot, fpath):
    n_rows = int(np.ceil(len(real_rivers_li) / 3))
    fig, axs = plt.subplots(n_rows, 3, figsize=(15, n_rows*3), sharex=False, sharey=False)
    axs = axs.flatten()
    # color_li = ['k', 'r', '']

    # For each real_river
    for i, real_river in enumerate(real_rivers_li):
        name = river_meta_df.loc[real_river, 'river_name']
        df = daily_load_dict[real_river].copy()/1000  # to T/yr
        df[cols_to_plot].plot(ax=axs[i], linewidth=0.5, legend=False)
        axs[i].set_xlabel('')
        axs[i].set_ylabel(r'DIN load (T/day)')
        axs[i].set_title(f'{name} (real river {real_river})')

    handles, labels = axs[-1].get_legend_handles_labels()
    fig.legend(handles, labels, loc='center right', bbox_to_anchor=(1.12, 0.5))

    plt.tight_layout()
    # fig.autofmt_xdate()

    plt.savefig(fpath)

    plt.show()

fpath = os.path.join(fig_folder, f"daily_riv-only_sector-inputs_ts_v{v}.png")
plot_timeseries_allrivs(real_rivers, daily_load_dict, cols_to_plot, fpath)

# Generate scenarios

Both WWTW and agricultural scenarios are now provided by TEOTIL (in contrast to work in 2024). There are two scenarios, one is more moderate, one more optimistic.

So the overall method is to:

1) Read scenario TEOTIL data, map from regine to real rivers, and average over time
2) Per real river, first check for each sector whether the load has changed in the scenario. Then if it has, calculate factor reductions for that sector compared to baseline
3) 

In [None]:
# Read scenario teotil data and average over time
scen_teotil_df = read_teotil_data(teotil_scen_res_fpath)
scen_teotil_av_df = teotil_average_over_years(scen_teotil_df)

# Calculate factor reductions from  this scenario per real river
col = 'large-wastewater'
f_wwtw_scenario_df = scen_teotil_av_df[col] / bsl_teotil_av_df[col]

# Have some NaNs (when baseline large wwtw inputs = 0). Replace any NaNs with 0
# (i.e. no change)
f_wwtw_scenario_df.fillna(0, inplace=True)

f_wwtw_scenario_df

The new WWTW inputs are around 23-30% of the original, i.e. a 70-80'ish percent reduction

In [None]:
wwtw_scen_load_dict = {}  # key: real riv no

for riv_no in daily_load_dict.keys():
    # Copy baseline data
    scen_load_df = daily_load_dict[riv_no].copy()[['agriculture', 'small-wastewater', 'urban', 'industry', 'background', 'large-wastewater']]

    # Reduce large wastewater by a set proportion per real river
    scen_load_df['large-wastewater'] = scen_load_df['large-wastewater'] * f_wwtw_scenario_df.loc[riv_no]

    # Add new total column
    scen_load_df['total'] = scen_load_df.sum(axis=1)

    wwtw_scen_load_dict[riv_no] = scen_load_df

# Quick check of output
print(f_wwtw_scenario_df.loc[7])
print(daily_load_dict[7]['large-wastewater'].head(2))
print(wwtw_scen_load_dict[7]['large-wastewater'].head(2))

## Other sectors: user-supplied % reduction

In [None]:
# Factor giving proportion of the original load per sector
# (i.e. new load = old load * factor)

# # Read in scenarios info?
# scen_setup_df = pd.read_csv(scenario_setup_fpath, index_col=0)

# Or input directly
scenario = 'RA80-J10'
scenario_dict = {
    "agriculture": 0.9,
    "industry": 1,
    "large-wastewater": 1,  # Don't change; taken care of in previous section
    "small-wastewater": 1,
    "urban": 1,
    "background": 1
}
scen_setup_df = pd.DataFrame(list(scenario_dict.items()), columns=['Sector', scenario])
scen_setup_df.set_index('Sector', inplace=True)

scen_setup_df

In [None]:
main_results_dict = {}  # key: (scenario, type), where type is 'loads', 'factor', 'conc'
                        # Returns loads_dict, factor_dict, conc_dict (key: real riv no)

for scenario in scen_setup_df.columns:

    scenario_loads_dict = {}
    scenario_factor_dict = {}
    scenario_conc_dict = {}  # Key: river

    for riv_no in daily_load_dict.keys():

        baseline_df = daily_load_dict[riv_no].copy()

        # Calculate loads per sector for the scenario
        scen_load_df = pd.DataFrame(index=baseline_df.index)

        for source in scen_setup_df.index:
            scen_load_df[source] = baseline_df[source] * scen_setup_df.loc[source, scenario]

        # Replace large-wwtw column with wwtw scenario already calculated above
        scen_load_df['large-wastewater'] = wwtw_scen_load_dict[riv_no]['large-wastewater']

        # Calculate new total load
        scen_load_df['total'] = scen_load_df.sum(axis=1)
        scenario_loads_dict[riv_no] = scen_load_df

        # ---------------------------------------------------------
        # Factor to multiply concentration by to get new concentration
        factor_s = scen_load_df['total'] / baseline_df['total']
        # Replace any NaNs with 1 (i.e. concentration is unchanged)
        factor_s.fillna(1, inplace=True)
        factor_s.name = 'factor'
        scenario_factor_dict[riv_no] = factor_s

        # ---------------------------------------------------------
        # Calculate new daily concentration: C_scenario = L_scenario / Q
        # or (equivalent): C_scenario = C_baseline * (L_scenario/L_baseline)
        temp_load_df = scen_load_df[["total"]].copy()
        temp_load_df.columns = [f"{par}_load"]

        # Adjust the time component of the datetime index to be centered on midday
        # to match chem_df
        temp_load_df.index = temp_load_df.index + pd.tseries.offsets.DateOffset(
            hours=12
        )
        factor_s = factor_s.copy()  # To not alter dictionary item inplace
        factor_s.index = factor_s.index + pd.tseries.offsets.DateOffset(hours=12)

        # Join new total load to conc_df, and the factor
        df = conc_df.loc[conc_df["real_river"] == riv_no].copy()
        df.set_index("river_time", inplace=True)
        df = pd.merge(
            df,
            temp_load_df[[f"{par}_load"]],
            left_index=True,
            right_index=True,
            how="left",
        )
        df = pd.merge(df, factor_s, left_index=True, right_index=True, how="left")

        # Calculate new value for par (two methods to validate; they produce the same result)
        # df[f"{par}_new"] = df['din_load'] / df['river_transport'] * 1/86400 * 10**9 * 10**-3
        df[f"{par}"] = df[f"{par}"] * df["factor"]

        if par == 'din':
            # Assume NH4 and NO3 are reduced by the same amount as din overall
            df['river_NH4N'] = df['river_NH4N'] * df["factor"]
            df['river_NO3NO2N'] = df['river_NO3NO2N'] * df["factor"]

            # Calculate new TOTN
            df['river_TOTN'] = df['din'] + df['river_DON'] + df['river_PON']

            # Drop cols used in calculation
            df.drop(derived_chemvar_dict[species] + [f"{par}_load", 'factor'], axis=1, inplace=True)

        # Bit of QC
        # Check for NaNs
        if df.isna().sum().sum() > 0:
            print(f"{riv_no} has NaNs. Needs fixing!")

        scenario_conc_dict[riv_no] = df

    main_results_dict[(scenario, 'loads')] = scenario_loads_dict
    main_results_dict[(scenario, 'factor')] = scenario_factor_dict
    main_results_dict[(scenario, 'conc')] = scenario_conc_dict

## Quick check of output

All looks ok for Glomma

In [None]:
riv = 7
scen = 'RA80-J10'
st = '2017-07-01'
end = '2017-07-10'
par = 'river_NO3NO2N'

scenario_loads_dict = main_results_dict[(scen, 'loads')]
scenario_factor_dict = main_results_dict[(scen, 'factor')]
scenario_conc_dict = main_results_dict[(scen, 'conc')]

# Baseline loads per sector + total
bsl_load = daily_load_dict[riv].copy().loc[st: end, 'total']
daily_load_dict[riv].copy().loc[st: end]

In [None]:
# Scenario loads
scen_load = scenario_loads_dict[riv].copy().loc[st: end, 'total']
scenario_loads_dict[riv].copy().loc[st: end]

In [None]:
# Factor for calculating concs
scenario_factor_dict[riv].loc[st: end]

In [None]:
scen_load/bsl_load

In [None]:
# Original conc
bsl_conc = conc_df.loc[conc_df["real_river"] == riv].set_index('river_time').loc[st: end, par]
conc_df.loc[conc_df["real_river"] == riv].set_index('river_time').loc[st: end]

In [None]:
# New concs
scen_conc = scenario_conc_dict[riv].loc[st: end, par]
scenario_conc_dict[riv].loc[st: end]

In [None]:
scen_load/bsl_load

In [None]:
scen_conc/bsl_conc

## Generate netcdf files

Read in Phil's original file, truncate to the period I've updated (i.e. the date which TEOTIL covers), and then update the concentration columns for the variables that have been updated through the scenario.

In [None]:
ds_scen_dict = {}  # key: scenario name

for scenario in scen_setup_df.columns:

    # scenario_loads_dict = main_results_dict[(scenario, 'loads')]
    # scenario_factor_dict = main_results_dict[(scenario, 'factor')]
    scenario_conc_dict = main_results_dict[(scenario, 'conc')]

    update_cols_li = ["river_NH4N", "river_NO3NO2N", "river_TOTN"]

    # Read in original river file & truncate to start and end date of scenarios
    ds_scen = xr.open_dataset(fpath_baseline_data)
    ds_scen = ds_scen.sel(river_time=slice(start_date, end_date))
    print(ds_scen.river_time.data[-1])  # Check not chopped end: ok

    # Update the relevant chemistry values
    for real_river, df in scenario_conc_dict.items():
        df = df.copy().reset_index()  # To not modify it inplace
        df_ds = xr.Dataset.from_dataframe(df)
        for col in update_cols_li:
            ds_scen[col].loc[
                {"real_river": real_river, "river_time": df_ds["river_time"]}
            ] = df_ds[col]

    # Update the 'History' attribute of the ds_scen dataset
    current_date = dt.datetime.now().date().strftime("%Y-%m-%d")
    existing_history = ds_scen.attrs["history"]
    new_history_str = (
        f"; Update {current_date}, Leah JB (NIVA, ljb@nivano): "
        "This is a daily concentration reduction scenario. v2 extends to the full of800 domain"
        "(i.e. includes 3 rivers not included in v1)."
        "See 'make_scenarios.ipynb' in GitHub repository https://github.com/oslofjord-load-reductions/terrestrial-load-scenarios"
    )
    new_history = existing_history + new_history_str
    ds_scen.attrs['history'] = new_history

    # Save
    netcdf_fpath = os.path.join(netcdf_outfolder, f"river_din_reduction_{scenario}_v{v}.nc")
    ds_scen.to_netcdf(netcdf_fpath)
    print(f"Scenario saved: {netcdf_fpath}")
    ds_scen_dict[scenario] = ds_scen
    ds_scen.close()

# Quick plot of results

The factor difference between load and concentration should be the same on the plots below. Because the load range is much larger than the concentration range, the scale is much more 'squished', so it doesn't look like it by eye. But if you zoom in (commented-out code below), then it is correct.

In [None]:
ds_bsl = xr.open_dataset(fpath_baseline_data)
ds_bsl = ds_bsl.sel(river_time=slice(start_date, end_date))

var = "river_NO3NO2N"

for riv_no in riv_nos:

    riv_name = river_meta_df.loc[riv_no, "river_name"]

    # Plot the baseline
    fig, axs = plt.subplots(2, 1, figsize=(15, 6), sharex=True, sharey=False)
    axs = axs.flatten()

    # Loads
    plot_bsl_load_df = daily_load_dict[riv_no][["total"]].copy() / 1000  #.loc[st_dt:end_df_dt, :] / 1000
    axs[0].plot(plot_bsl_load_df.index.values, plot_bsl_load_df["total"], label="baseline")

    # Concentrations
    plot_bsl_ds = ds_bsl.sel(real_river=riv_no)
    axs[1].plot(plot_bsl_ds["river_time"].values, plot_bsl_ds[var].values, label="baseline")

    # Plot scenarios
    for scenario in scen_setup_df.columns:

        # Get concentration data
        plot_conc_df = main_results_dict[(scenario, 'conc')][riv_no].copy()
        plot_scen_ds = ds_scen_dict[scenario].copy().sel(real_river=riv_no)

        # Get loads data
        plot_scen_load_df = (
            main_results_dict[(scenario, 'loads')][riv_no][["total"]]
            .copy() / 1000
        )

        # Plot loads
        axs[0].plot(plot_scen_load_df.index.values, plot_scen_load_df["total"], label='scenario')
        axs[0].set_ylabel("NO3-N load (T/d)")
        axs[0].set_ylim(ymin=0)

        # Plot concs
        axs[1].plot(plot_scen_ds["river_time"].values, plot_scen_ds[var].values, label='scenario')
        # axs[1].plot(plot_conc_df.index, plot_conc_df[var].values, label='df_scenario')

        # --------------------------------------------------

    # # Zoom in
    # plot_start_date = datetime.strptime(st, '%Y-%m-%d')
    # plot_end_date = datetime.strptime(end, '%Y-%m-%d')
    # axs[0].set_xlim(xmin=plot_start_date, xmax=plot_end_date)
    # axs[1].set_xlim(xmin=plot_start_date, xmax=plot_end_date)
    # axs[0].set_ylim(ymax=25)
    # # axs[1].set_ylim(ymax=300)
    # axs[0].yaxis.grid(True, which='major')
    # axs[1].yaxis.grid(True, which='major')

    # Tidy up & save
    axs[1].set_ylabel("NO3-N conc (ug/l)")
    axs[0].legend()
    plt.suptitle(f'{riv_no}: {riv_name}')
    plt.tight_layout()

    fpath = os.path.join(fig_folder, "scenario_tseries", f"scenario_ts_{riv_name}_v{v}.png")
    plt.savefig(fpath)

    plt.show()

## Comments

* Scenario concentrations can have unrealistic step changes in them. This is especially bad for Tista, where the baseline concentration is smooth enough, but the baseline load is already very steppy, so then we get very steppy scenarios. Even more so as in Tista there was a lot of redistribution of negative mass, i.e. times when the scenario reduction factor = 1, so then the concentration hops up to the baseline.

# Summary of effect of scenarios

Rough back-of-envelope estimate of % reduction is about 15% (from about 75% reduction of sewage inputs, 10% of agricultural inputs, and given 63% of inputs are agricultural, 12% are WWTW)

In [None]:
((0.63 * 0.1) + (0.12 * 0.75) + (0.25 * 0)) / (0.63 + 0.12 + 0.25)

In [None]:
multi_index_df = pd.concat([df['total'] for df in daily_load_dict.values()], keys=daily_load_dict.keys())
multi_index_df.head()

In [None]:
def process_dict(loads_dict, input_column_name, output_col_name):
    # Concatenate dataframes with keys as the first level of the index
    # Keep only the desired column from each dataframe
    multi_index_df = pd.concat([df[input_column_name] for df in loads_dict.values()], keys=loads_dict.keys())

    # Group by the first level of the index (riv_number), resample to annual frequency, sum within each year,
    # then calculate the mean over years
    yr_sum_s = multi_index_df.groupby(level=0).resample('YE', level=1).sum()
    yr_meansum_s = yr_sum_s.groupby(level=0).mean()

    # Reset the index and rename the columns
    yr_meansum_s.name = output_col_name

    return yr_meansum_s


# Combine into one df
def combine_processed(baseline_s, scenario_s, column_name):
    # Merge the two dataframes on the index (river_number)
    result_df = pd.merge(baseline_s, scenario_s, left_index=True, right_index=True)

    total_row = result_df.sum()
    total_row.name = column_name
    result_df.loc[column_name, :] = total_row

    # Convert from kg to T and round
    result_df = (result_df / 1000).round(2)

    result_df['f'] = result_df['scenario'] / result_df['baseline']
    result_df['1-f'] = 1 - result_df['f']

    # Print the resulting dataframe
    print(result_df)
    return result_df

In [None]:
# Total load reduction for oslofjord
baseline_s = process_dict(daily_load_dict, 'total', 'baseline')
scenario_s = process_dict(scenario_loads_dict, 'total', 'scenario')
result_df = combine_processed(baseline_s, scenario_s, 'total')

The total load reduction for the Oslofjord is 14%, very close to the 15% expected from the back-of-the-envelope calculation. Phew! Glomma: 16% reduction; Drammenselva: 15% reduction. Rest: less, sometimes a lot less.

To do:
* Look at seasonality
* In develop_make_scenarios, look at change in sewage inputs (or separate notebook?).

In [None]:
# Agricltural load reduction for oslofjord
baseline_s = process_dict(daily_load_dict, 'agriculture', 'baseline')
scenario_s = process_dict(scenario_loads_dict, 'agriculture', 'scenario')
result_df = combine_processed(baseline_s, scenario_s, 'agriculture')

fpath = os.path.join(csv_exploration_folder, f"v{v}", "change_agricultural_loads.csv")
result_df.to_csv(fpath)

In [None]:
# Large WWTW
# Agricltural load reduction for oslofjord
sector = 'large-wastewater'
baseline_s = process_dict(daily_load_dict, sector, 'baseline')
scenario_s = process_dict(scenario_loads_dict, sector, 'scenario')
result_df = combine_processed(baseline_s, scenario_s, sector)

fpath = os.path.join(csv_exploration_folder, f"v{v}", "change_river-wwtw_loads.csv")
result_df.to_csv(fpath)