# Converter from pommesinvest to AMIRIS
Convert pommesinvest results to AMIRIS input data format

> **IMPORTANT**
>
> * Run `investment_results_inspection.ipynb` once before.
> * Run `exogenous_plant_analyses.ipynb` once before.
> * Ensure necessary model input resp. results data is available at the respective locations.

## Package imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from pommesevaluation.amiris_converter import (
    convert_annual_data_to_fame_time, 
    convert_time_series_index_to_fame_time,
    resample_to_hourly_frequency,
    extract_net_operation,
)

## Notebook and workflow settings

In [None]:
# Simulation settings
time_frame_in_years = 26
freq = "1H"
multiplier = {
    "1H": 1,
    "4H": 4,
    "8H": 8,
}
fuel_cost_pathway = "NZE"
fuel_price_shock = "high"
emissions_cost_pathway = "long-term"

# Paths to data needed to be converted
path_model_inputs = "./model_inputs/pommesinvest/"
path_model_results = "./model_results/pommesinvest/"
path_processed_outputs = "./data_out/"

# Filename and scenarios
filename_investment = "results_for_amiris_total_dr_scenario"
filename_backup = "installed_capacity_backup_generation.csv"
filename_storage = "installed_storage_capacity.csv"
file_name_res_capacities = "sources_renewables_investment_model.csv"
file_name_res_generation = "sources_renewables_ts_hourly.csv"

# Map corresponding flex options scenario (values) to dr scenarios (keys)
dr_scenarios = {
    "none": "50",
    "5": "95", 
    "50": "50",
    "95": "5"
}

file_names_demand = {
    scen: f"sinks_demand_el_excl_demand_response_{scen}.csv"
    for scen in dr_scenarios
}
file_names_demand_ts = {
    scen: f"sinks_demand_el_excl_demand_response_ts_{scen}_hourly.csv"
    for scen in dr_scenarios
}
file_names_demand["none"] = "sinks_demand_el.csv"
file_names_demand_ts["none"] = "sinks_demand_el_ts_hourly.csv"

file_names_dispatch_results = {
    scen: (
        f"investment_LP_start-2020-01-01_{time_frame_in_years}"
        f"-years_simple_freq_{freq}_with_dr_{scen}_"
        f"fuel_price-{fuel_cost_pathway}_{fuel_price_shock}_"
        f"co2_price-{emissions_cost_pathway}_production.csv"
    ) for scen in dr_scenarios
}
file_names_dispatch_results["none"] = (
    f"investment_LP_start-2020-01-01_{time_frame_in_years}"
    f"-years_simple_freq_{freq}_no_dr_50_"
    f"fuel_price-{fuel_cost_pathway}_{fuel_price_shock}_"
    f"co2_price-{emissions_cost_pathway}_production.csv"
)
file_name_demand_response_eligibility = "demand_response_clusters_eligibility.csv"

file_name_co2_prices = f"costs_emissions_{emissions_cost_pathway}_nominal_indexed_ts.csv"
file_name_fuel_prices = f"costs_fuel_{fuel_cost_pathway}_{fuel_price_shock}_nominal_indexed_ts.csv"
file_names_opex = {
    scen: f"variable_costs_{flex_scenario}%_nominal.csv"
    for scen, flex_scenario in dr_scenarios.items()
}
file_names_fixed_costs = {
    scen: f"fixed_costs_{flex_scenario}%_nominal.csv"
    for scen, flex_scenario in dr_scenarios.items()
}
file_names_investment_expenses = {
    scen: f"investment_expenses_{flex_scenario}%_nominal.csv"
    for scen, flex_scenario in dr_scenarios.items()
}

file_name_investment_options = "transformers_investment_options.csv"
file_name_availabilities = "transformers_availability_ts_hourly.csv"

file_name_transformers = "transformers_exogenous.csv"
file_name_transformers_max = "transformers_exogenous_max_ts.csv"

# Define which pieces of information to exclude from investments
exclude_from_investment = [
    "PHS_inflow", 
    "battery_inflow",
]

# Define demand response cluster to focus on in AMIRIS analyses
demand_response_focus_cluster = "ind_cluster_shift_only"

file_names_baseline_load = {
    scen: f"{path_model_inputs}sinks_demand_response_el_ts_{scen}.csv"
    for scen in dr_scenarios
}

# Read in and convert
## Obtain demand response clusters before actual results extraction

In [None]:
demand_response_clusters = pd.read_csv(f"{path_model_inputs}{file_name_demand_response_eligibility}", index_col=0)
demand_response_cluster_index = demand_response_clusters.index
exclude_from_investment.extend(demand_response_cluster_index)
exclude_from_investment.remove(demand_response_focus_cluster)

## Transformers & renewables
* Different for all scenarios: Investment results
    * transformers + demand response focus cluster only
* Same across all scenarios:
    * Backup generation (exogenous transformers)
    * Renewable capacities & generation

In [None]:
# Investments per scenario
for dr_scenario in dr_scenarios:
    invest_results_all = pd.read_csv(
        f"{path_processed_outputs}{filename_investment}_{dr_scenario}.csv", 
        index_col=0, header=[0, 1]
    )
    invest_results = invest_results_all[[
        col for col in invest_results_all.columns if col[0] not in exclude_from_investment
    ]]
    _ = convert_annual_data_to_fame_time(
        invest_results, 
        save=True, 
        path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
        filename="installed_capacity_ts",
        rounding_precision=2
    )

In [None]:
# Backup and RES
backup_capacities = pd.read_csv(f"{path_processed_outputs}{filename_backup}", index_col=0)
res_generation = pd.read_csv(f"{path_model_inputs}{file_name_res_generation}", index_col=0)

_ = convert_annual_data_to_fame_time(
    backup_capacities,
    save=True,
    path=f"{path_processed_outputs}/amiris/all_scenarios/",
    filename="exogenous_installed_capacity_ts",
    rounding_precision=2,
)
_ = convert_time_series_index_to_fame_time(
    res_generation,
    save=True,
    path=f"{path_processed_outputs}/amiris/all_scenarios/",
    filename="res_generation_ts",
    rounding_precision=4,
)

res_capacities_2020 = pd.read_csv(f"{path_model_inputs}{file_name_res_capacities}", index_col=0)
res_capacities_2020 = res_capacities_2020.loc[res_capacities_2020["country"] == "DE"]
res_capacities_2020.index = res_capacities_2020.index.str.split("_", expand=True).get_level_values(2)
res_capacities_2020 = res_capacities_2020["capacity"].round(2)
res_capacities_2020.to_csv(f"{path_processed_outputs}/amiris/all_scenarios/installed_renewable_capacities_2020.csv")

## "Net" Demand
Since in AMIRIS, there is a shortcoming in terms of modelling competing demand-side flexibility options, all flexibility options except the one demand response-cluster that is focused upon are not modelled explicitly, but their dispatch is considered in calculating a net demand, i.e. a 
* demand after
    * imports and exports, (add net exports, i.e. additional demand)
    * storages, (subtract net outflow)
    * demand response (except for the focus cluster)
    * electrolyzer operation

In [None]:
original_demand = {}
demand_after_flexibility = {}

for dr_scenario in dr_scenarios:
    # Original demand
    sinks_demand_el = pd.read_csv(f"{path_model_inputs}{file_names_demand[dr_scenario]}", index_col=0)
    max_demand = sinks_demand_el.at["DE_sink_el_load", "maximum"]
    sinks_demand_el_ts = pd.read_csv(f"{path_model_inputs}{file_names_demand_ts[dr_scenario]}", index_col=0)
    original_demand[dr_scenario] = sinks_demand_el_ts["DE_sink_el_load"] * max_demand
    
    demand_after_flex = original_demand[dr_scenario].copy()
    demand_after_flex.index = pd.to_datetime(demand_after_flex.index)
    
    # Read in dispatch results
    dispatch_results = pd.read_csv(
        f"{path_model_results}{file_names_dispatch_results[dr_scenario]}", index_col=0
    )
    
    # Filter imports and exports and add net exports (exports - imports) to demand
    net_exports = extract_net_operation(
        dispatch_results,
        column_str="link_",
        outflow_column_str="DE_link_",
        inflow_column_str="_link_DE",
    )
    
    # Resample (different frequency) and adjust demand after flex
    net_exports = resample_to_hourly_frequency(net_exports, multiplier[freq])    
    demand_after_flex += net_exports
    
    # Filter electrical storage results and subtract net storage (outflow - inflow) from demand
    net_storage_outflow = extract_net_operation(
        dispatch_results,
        column_str="DE_storage_el",
        outflow_column_str=", 'DE_bus_el')",
        inflow_column_str="('DE_bus_el',",
    )
    
    # Resample (different frequency) and adjust demand after flex
    net_storage_outflow = resample_to_hourly_frequency(net_storage_outflow, multiplier[freq])    
    demand_after_flex -= net_storage_outflow
    
    # Filter demand response results except for the focus cluster and add net demand
    demand_response_results = dispatch_results[
        [
            col for col in dispatch_results.columns
            for cluster in demand_response_clusters.index
            if cluster in col and ", 'flow')" in col
            and cluster != demand_response_focus_cluster
        ]
    ]

    # Drop duplicates
    demand_response_results = demand_response_results.loc[:, ~demand_response_results.columns.duplicated()]
    try:
        demand_response_results = resample_to_hourly_frequency(demand_response_results, multiplier[freq])
        demand_after_flex += demand_response_results.sum(axis=1)
    # Skip in case no demand response is modelled
    except ValueError:
        pass
    
    # Filter electrolyzer dispatch and add electrolyzer electricity consumption to demand
    electrolyzer_dispatch = extract_net_operation(
        dispatch_results,
        column_str="electrolyzer",
        outflow_column_str="NO_OUTFLOW",  # outflow goes to hydrogen bus
        inflow_column_str="('DE_bus_el',",
    ).mul(-1)
    
    # Resample (different frequency) and adjust demand after flex
    electrolyzer_dispatch = resample_to_hourly_frequency(electrolyzer_dispatch, multiplier[freq])    
    demand_after_flex += electrolyzer_dispatch
    
    # Increase load by baseline demand of load shifting focus cluster
    if dr_scenario != "none":
        max_capacity_focus_cluster = pd.read_csv((
                f"{path_model_inputs}{demand_response_focus_cluster}_"
                f"potential_parameters_{dr_scenario}%.csv"
            ), 
            index_col = 0
        )[["max_cap"]]
        max_capacity_focus_cluster.index = max_capacity_focus_cluster.index.astype(str) + "-01-01 00:00:00"
        max_capacity_focus_cluster = resample_to_hourly_frequency(max_capacity_focus_cluster, multiplier[freq])
        max_capacity_focus_cluster.index = max_capacity_focus_cluster.index.astype(str)
        
        baseline_load_profile_focus_cluster = pd.read_csv(
            file_names_baseline_load[dr_scenario], 
            index_col=0
        )[demand_response_focus_cluster]
        baseline_load_profile_focus_cluster = (
            baseline_load_profile_focus_cluster * max_capacity_focus_cluster["max_cap"]
        )
        
        demand_after_flex += baseline_load_profile_focus_cluster.values
        
    # Store demand after flexibility; adjust to FAME time and save file
    demand_after_flexibility[dr_scenario] = demand_after_flex.round(2)

    _ = convert_time_series_index_to_fame_time(
        demand_after_flex,
        save=True,
        path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
        filename="demand_after_flex_ts",
        rounding_precision=2,
    )

In [None]:
fig, axs = plt.subplots(len(demand_after_flexibility), figsize=(15, 5 * len(demand_after_flexibility)))

for ax, (key, value) in enumerate(demand_after_flexibility.items()):
    value.plot(ax=axs[ax])
    axs[ax].set_ylim(0, 200000)
plt.show()

## Prices and costs
Extract prices and costs from input data and convert to FAME format:
* Same across all scenarios:
    * CO2 price
    * Fuel prices
* Different per scenario:
    * OPEX
    * fixed costs

In [None]:
co2_price = pd.read_csv(f"{path_model_inputs}{file_name_co2_prices}", index_col=0)
co2_price.index = co2_price.index.str[:4]
co2_price = co2_price[["DE_source_hardcoal"]]
co2_price.columns = ["nominal_value"]

fuel_prices = pd.read_csv(f"{path_model_inputs}{file_name_fuel_prices}", index_col=0)
fuel_prices.index = fuel_prices.index.str[:4]
fuel_prices = fuel_prices[[col for col in fuel_prices.columns if "DE_" in col]]

_ = convert_annual_data_to_fame_time(
    co2_price,
    save=True,
    path=f"{path_processed_outputs}/amiris/all_scenarios/",
    filename="emissions_costs",
    rounding_precision=3,
)
_ = convert_annual_data_to_fame_time(
    fuel_prices,
    save=True,
    path=f"{path_processed_outputs}/amiris/all_scenarios/",
    filename="fuel_prices",
    rounding_precision=3,
)

In [None]:
# OPEX, fixed costs and specific investment expenses per scenario
for dr_scenario in dr_scenarios:
    opex = pd.read_csv(f"{path_model_inputs}{file_names_opex[dr_scenario]}", index_col=0)
    opex.index = opex.index.str[:4]
    _ = convert_annual_data_to_fame_time(
        opex, 
        save=True, 
        path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
        filename="opex",
        rounding_precision=3,
    )

    # Fixed costs are given as percentage of investment per year; thus need to be calculated first if used at all
    fixed_costs = pd.read_csv(f"{path_model_inputs}{file_names_fixed_costs[dr_scenario]}", index_col=0)
    fixed_costs.to_csv(f"{path_processed_outputs}/amiris/{dr_scenario}/fixed_costs.csv", sep=";")

## Availabilities for conventionals

In [None]:
availabilities = pd.read_csv(f"{path_model_inputs}{file_name_availabilities}", index_col=0)
_ = convert_time_series_index_to_fame_time(
    availabilities,
    save=True,
    path=f"{path_processed_outputs}/amiris/all_scenarios/",
    filename="availabilities",
    rounding_precision=3,
)

## Demand response
Extract the following pieces of information:
* Load shedding by eligible clusters:
    * overall availability time series for load shedding
    * variable costs for shedding
* Demand response focus cluster (shifting):
    * normalized availability for upshift resp. downshift of focus cluster
    * costs and potential parameters for demand response focus cluster

In [None]:
# Prepare hourly frequency investment results for creating shedding time series
hourly_invest_results_all = invest_results_all.copy()
hourly_invest_results_all.loc[2051] = hourly_invest_results_all.iloc[-1]
hourly_invest_results_all.index = pd.to_datetime([f"{idx}-01-01" for idx in hourly_invest_results_all.index])
hourly_invest_results_all = hourly_invest_results_all.resample("H").ffill()[:-1]

In [None]:
for dr_scenario in dr_scenarios:
    if dr_scenario != "none":
        availability_pos_ts = pd.read_csv(
            f"{path_model_inputs}sinks_demand_response_el_ava_pos_ts_{dr_scenario}.csv", 
            index_col=0
        )
        availability_pos_ts.index = pd.to_datetime(availability_pos_ts.index)
        availability_neg_ts = pd.read_csv(
            f"{path_model_inputs}sinks_demand_response_el_ava_neg_ts_{dr_scenario}.csv", 
            index_col=0
        )
        baseline_load_profile = pd.read_csv(
            f"{path_model_inputs}sinks_demand_response_el_ts_{dr_scenario}.csv", 
            index_col=0
        )

        # Prepare data to parameterize load shedding
        for dr_cluster in demand_response_clusters.loc[demand_response_clusters["shedding"] == 1].index:
            try:
                var_costs = pd.read_csv(
                    f"{path_model_inputs}{dr_cluster}_variable_costs_parameters_{dr_scenario}%.csv", 
                    index_col=0
                )
                var_costs.index = var_costs.index.str[:4]
                var_costs.loc["2020"].to_csv(
                    f"{path_processed_outputs}/amiris/{dr_scenario}/"
                    f"{dr_cluster}_variable_costs_2020.csv"
                )
                _ = convert_annual_data_to_fame_time(
                    var_costs,
                    save=True,
                    path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
                    filename=dr_cluster,
                    rounding_precision=3,
                )
                availability_pos_ts[dr_cluster] *= hourly_invest_results_all[(dr_cluster, dr_cluster)]
                _ = convert_time_series_index_to_fame_time(
                    availability_pos_ts[dr_cluster],
                    save=True,
                    path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
                    filename=f"availability_shedding_{dr_cluster}",
                    rounding_precision=3,
                )
            except FileNotFoundError:
                pass

        # Prepare data for demand response focus cluster

        # Costs
        var_costs = pd.read_csv(
            f"{path_model_inputs}{demand_response_focus_cluster}_variable_costs_parameters_{dr_scenario}%.csv", 
            index_col=0
        )
        var_costs.index = var_costs.index.str[:4]
        # Extract costs for 2020 separately since AMIRIS can only handle scalars
        var_costs.loc["2020"].to_csv(
            f"{path_processed_outputs}/amiris/{dr_scenario}/"
            f"{demand_response_focus_cluster}_variable_costs_2020.csv",
        )
        _ = convert_annual_data_to_fame_time(
            var_costs,
            save=True,
            path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
            filename=demand_response_focus_cluster,
            rounding_precision=3,
        )
        fixed_costs_and_investments = pd.read_csv((
                f"{path_model_inputs}{demand_response_focus_cluster}_"
                f"fixed_costs_and_investments_parameters_{dr_scenario}%.csv"
            ), 
            index_col=0
        )
        fixed_costs_and_investments.index = fixed_costs_and_investments.index.str[:4]
        _ = convert_annual_data_to_fame_time(
            fixed_costs_and_investments,
            save=True,
            path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
            filename=demand_response_focus_cluster,
            rounding_precision=3,
        )
        
        # Potential parameters
        potential_parameters = pd.read_csv((
                f"{path_model_inputs}{demand_response_focus_cluster}_"
                f"potential_parameters_{dr_scenario}%.csv"
            ), 
            index_col = 0
        )
        # Extract parameters for 2020 separately since AMIRIS can only handle scalars for some values (mapping)
        potential_parameters.loc[2020].to_csv(
            f"{path_processed_outputs}/amiris/{dr_scenario}/"
            f"{demand_response_focus_cluster}_potential_parameters_2020.csv"
        )
        _ = convert_annual_data_to_fame_time(
            potential_parameters,
            save=True,
            path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
            filename=demand_response_focus_cluster,
            rounding_precision=3,
        )

        # Baseline load time series and availabilities
        _ = convert_time_series_index_to_fame_time(
            baseline_load_profile[demand_response_focus_cluster],
            save=True,
            path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
            filename=f"baseline_load_profile_{demand_response_focus_cluster}",
            rounding_precision=4,
        )
        _ = convert_time_series_index_to_fame_time(
            availability_pos_ts[demand_response_focus_cluster],
            save=True,
            path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
            filename=f"availability_shifting_pos_{demand_response_focus_cluster}",
            rounding_precision=4,
        )
        _ = convert_time_series_index_to_fame_time(
            availability_neg_ts[demand_response_focus_cluster],
            save=True,
            path=f"{path_processed_outputs}/amiris/{dr_scenario}/",
            filename=f"availability_shifting_neg_{demand_response_focus_cluster}",
            rounding_precision=4,
        )

# Derive efficiencies for conventionals
## Existing / exogenous plants
For existing conventional plants, use a regression approach to estimate minimum and maximum efficiency values.

In [None]:
# Read in transformers (conventionals) data
transformers = pd.read_csv(f"{path_model_inputs}{file_name_transformers}", index_col=0)
transformers_max_ts = pd.read_csv(f"{path_model_inputs}{file_name_transformers_max}", index_col=0)

# Combine information to derive installed capacities by year
transformers_capacity_ts = transformers_max_ts.mul(transformers["capacity"])

# Group by tech_fuel
transformers_capacity_ts.index = transformers_capacity_ts.index.str[:4]
transformers_capacity_ts_transposed = transformers_capacity_ts.T
transformers_capacity_ts_transposed[["efficiency_el", "tech_fuel"]] = (
    transformers[["efficiency_el", "tech_fuel"]]
)
grouped_plants = {
    tech_fuel: plants.sort_values(by="efficiency_el") 
     for tech_fuel, plants in transformers_capacity_ts_transposed.groupby("tech_fuel")
}

In [None]:
# Perform a regression analysis to derive efficiencies and calculate installed capacities meanwhile
for key, value in grouped_plants.items():
    power_plants_regression = pd.DataFrame(
        index=range(2020, 2051), 
        columns=["efficiency_min", "efficiency_max", "installed_cap"]
    )
    for iter_year in range(2020, 2051):
        value["cumulated_capacity"] = value[str(iter_year)].cumsum()

        X = grouped_plants[key].cumulated_capacity.values
        Y = grouped_plants[key].efficiency_el.values

        X = sm.add_constant(X)

        efficiency_regression = sm.OLS(Y, X).fit()

        x = np.linspace(0, X.max(), int(X.max()))
        if len(Y) > 1:
            regression_function = efficiency_regression.params[0] + efficiency_regression.params[1] * x
            min_efficiency = max(0.1, round(regression_function[0], 4))
            max_efficiency = round(regression_function[-1], 4)
        # Only one entry in group; thus no regression function
        else:
            min_efficiency = round(Y[0], 4)
            max_efficiency = round(Y[0], 4)

        power_plants_regression.at[iter_year, "efficiency_min"] = min_efficiency
        power_plants_regression.at[iter_year, "efficiency_max"] = max_efficiency
        power_plants_regression.at[iter_year, "exogenous_installed_cap"] = value["cumulated_capacity"].iloc[-1]

    _ = convert_annual_data_to_fame_time(
        power_plants_regression,
        save=True,
        path=f"{path_processed_outputs}/amiris/all_scenarios/",
        filename=f"{key}"
    )

## Endogenous installations
Extract single efficiency value from input data

In [None]:
investment_options = pd.read_csv(f"{path_model_inputs}/{file_name_investment_options}", index_col=0)

investment_options["new_index"] = investment_options.index.str.split(
    "_", 2, expand=True
).get_level_values(2).str.rsplit(
    "_", 2, expand=True
).get_level_values(0)

investment_options = investment_options.set_index("new_index")
efficiency_df = pd.DataFrame(
    index=range(2020, 2051), 
    columns=investment_options.index, 
)
efficiency_df.loc[2020] = investment_options["efficiency_el"]
efficiency_df = efficiency_df.ffill()

_ = convert_annual_data_to_fame_time(
    efficiency_df,
    save=True,
    path=f"{path_processed_outputs}/amiris/all_scenarios/",
    filename="efficiency_el"
)