# Investment model sanity check for inputs
This notebook seems to inspect all the inputs passed to _pommesinvest_ in order to inspect potential data bugs from
* visual inspection,
* analysis of some statistic moments,
* assert statements to check whether assumptions are valid.

## Package imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from pommesevaluation.pommesinvest_routines import InvestmentModelDummy, load_input_data
from pommesevaluation.investment_results_inspection import plot_time_series_cols

## Parameter settings

In [None]:
plt.rcParams.update({'font.size': 12})

# Starting basis applied for all scenarios
path_folder_input = "./model_inputs/pommesinvest/"
start_time = "2020-01-01 00:00:00"
end_time = "2045-12-31 23:00:00"
freq = "1H"
overlap_in_time_steps = 0
countries = [
    "AT", "BE", "CH", "CZ", "DE", "DK1", "DK2", "FR", "NL", 
    "NO1", "NO2", "NO3", "NO4", "NO5", "PL", "SE1", "SE2", "SE3", "SE4",
    "IT",
]
fuel_cost_pathway = "NZE"  # net zero emissions
fuel_price_shock = "high"
emissions_cost_pathway = "long-term"
activate_emissions_pathway_limit = True
activate_emissions_budget_limit = False

# Parameters varying scenario-dependent
activate_demand_response = False
flexibility_options_scenario = "50"
demand_response_scenario = "50"

## Read in data sets
* Create an "InvestmentModelDummy" object, which holds some of the attributes of an InvestmentModel, but not really is one.
* Copy & paste methods from pommesinvest for reading in data to obtain the exact same data sets and make the data read in dependent on the configuration.

In [None]:
im = InvestmentModelDummy(
    path_folder_input=path_folder_input,
    countries=countries,
    start_time=start_time,
    end_time=end_time,
    overlap_in_time_steps=overlap_in_time_steps,
    freq=freq,
)

In [None]:
buses = {"buses": "buses"}

components = {
    "sinks_excess": "sinks_excess",
    "sinks_demand_el": "sinks_demand_el",
    "sources_shortage": "sources_shortage",
    "sources_commodity": "sources_commodity",
    "sources_renewables": "sources_renewables_investment_model",
    "exogenous_storages_el": "storages_el_exogenous",
    "new_built_storages_el": "storages_el_investment_options",
    "exogenous_transformers": "transformers_exogenous",
    "new_built_transformers": "transformers_investment_options",
}

hourly_time_series = {
    "sinks_demand_el_ts": "sinks_demand_el_ts_hourly",
    "sources_renewables_ts": "sources_renewables_ts_hourly",
    "transformers_minload_ts": "transformers_minload_ts_hourly",
    "transformers_availability_ts": "transformers_availability_ts_hourly",
    "linking_transformers_ts": "linking_transformers_ts",
}

annual_time_series = {
    "transformers_exogenous_max_ts": "transformers_exogenous_max_ts",
    "costs_fuel_ts": (
        f"costs_fuel_{fuel_cost_pathway}_{fuel_price_shock}_nominal_indexed_ts"
    ),
    "costs_emissions_ts": (
        f"costs_emissions_{emissions_cost_pathway}_nominal_indexed_ts"
    ),
    "costs_operation_ts": (
        f"variable_costs_{flexibility_options_scenario}%_nominal"
    ),
    "costs_operation_storages_ts": (
        f"variable_costs_storages_{flexibility_options_scenario}%_nominal"
    ),
    "costs_investment": (
        f"investment_expenses_{flexibility_options_scenario}%_nominal"
    ),
    "costs_storages_investment_capacity": (
        f"investment_expenses_storages_capacity_{flexibility_options_scenario}%_nominal"
    ),
    "costs_storages_investment_power": (
        f"investment_expenses_storages_power_{flexibility_options_scenario}%_nominal"
    ),
    "linking_transformers_annual_ts": "linking_transformers_annual_ts",
    "storages_el_exogenous_max_ts": "storages_el_exogenous_max_ts",
}

# Time-invariant data sets
other_files = {
    "emission_limits": "emission_limits",
    "wacc": "wacc",
    "interest_rate": "interest_rate",
    "fixed_costs": (
        f"fixed_costs_{flexibility_options_scenario}%_nominal"
    ),
    "fixed_costs_storages": (
        f"fixed_costs_storages_{flexibility_options_scenario}%_nominal"
    ),
    "hydrogen_investment_maxima": "hydrogen_investment_maxima",
    "linking_transformers": "linking_transformers",
}

# Development factors for emissions; used for scaling minimum loads
if (
    activate_emissions_pathway_limit
    or activate_emissions_budget_limit
):
    other_files[
        "emission_development_factors"
    ] = "emission_development_factors"

# Add demand response units
if activate_demand_response:
    # Overall demand = overall demand excluding demand response baseline
    hourly_time_series["sinks_demand_el_ts"] = (
        f"sinks_demand_el_excl_demand_response_ts_{im.demand_response_scenario}_hourly"
    )
    components["sinks_demand_el"] = (
        f"sinks_demand_el_excl_demand_response_{im.demand_response_scenario}"
    )

    # Obtain demand response clusters from file to avoid hard-coding
    components[
        "demand_response_clusters_eligibility"
    ] = "demand_response_clusters_eligibility"
    dr_clusters = load_input_data(
        filename="demand_response_clusters_eligibility", im=im
    )
    # Add demand response clusters information to the model itself
    im.add_demand_response_clusters(list(dr_clusters.index))
    for dr_cluster in dr_clusters.index:
        components[f"sinks_dr_el_{dr_cluster}"] = (
            f"{dr_cluster}_potential_parameters_{im.demand_response_scenario}%"
        )
        annual_time_series[f"sinks_dr_el_{dr_cluster}_variable_costs"] = (
            f"{dr_cluster}_variable_costs_parameters_{im.demand_response_scenario}%"
        )
        annual_time_series[
            f"sinks_dr_el_{dr_cluster}_fixed_costs_and_investments"
        ] = (
            f"{dr_cluster}_fixed_costs_and_investments_parameters_{im.demand_response_scenario}%"
        )

    hourly_time_series[
        "sinks_dr_el_ts"
    ] = f"sinks_demand_response_el_ts_{im.demand_response_scenario}"

    hourly_time_series["sinks_dr_el_ava_pos_ts"] = (
        f"sinks_demand_response_el_ava_pos_ts_{im.demand_response_scenario}"
    )
    hourly_time_series["sinks_dr_el_ava_neg_ts"] = (
        f"sinks_demand_response_el_ava_neg_ts_{im.demand_response_scenario}"
    )

# Combine all files
input_files = {
    **buses,
    **components,
    **annual_time_series,
    **hourly_time_series,
}
input_files = {**input_files, **other_files}

input_data = {
    key: load_input_data(filename=name, im=im)
    for key, name in input_files.items()
}

# Components

## Buses
Assertion statements

In [None]:
buses = input_data["buses"]

display(buses.head(10))
display(buses.tail(10))

In [None]:
buses.describe()

In [None]:
check_countries = (buses.index.str.split("_", 1, expand=True).get_level_values(0) == buses["country"]).all()
assert check_countries == True, f"Expected False, but result was {to_check}"

bus_names = set(buses.index.str.split("_", expand=True).get_level_values(1))
assert bus_names == {"bus"}, f"Expected 'bus' as the only name, but result was {bus_names}"

bus_commodities = set(buses.index.str.rsplit("_", 1, expand=True).get_level_values(1))
all_commodities = {
    'el', 'biomass', 'hardcoal', 'hydro', 'hydrogen', 
    'lignite', 'mixedfuels', 'natgas', 'oil', 'otherfossil',
    'solarPV', 'uranium', 'waste', 'windoffshore', 'windonshore'
}
assert bus_commodities == all_commodities

## Sinks
### Excess Sinks
Only visual inspection sufficient

In [None]:
sinks_excess = input_data["sinks_excess"]

In [None]:
sinks_excess

### Demand sinks
* Should contain maximum values as of 2020
* Visual inspection

In [None]:
demand = input_data["sinks_demand_el"]

In [None]:
demand

In [None]:
fig, ax = plt.subplots(figsize=(16, 10))
_ = demand["maximum"].sort_values(ascending=False).plot(kind="bar", ax=ax)
plt.show()

## Sources
### Shortage sources
Only visual inspection

In [None]:
shortage = input_data["sources_shortage"]

In [None]:
shortage

### Commodity sources
Only visual inspection

In [None]:
commodities = input_data["sources_commodity"]

In [None]:
commodities

In [None]:
fuels = commodities.index.str.rsplit("_", 1, expand=True).get_level_values(1).unique()
for fuel in fuels:
    display(commodities.loc[commodities.index.str.contains(fuel)])

### Renewable sources
* Visual inspection
* Statistical moments of data set

In [None]:
renewables = input_data["sources_renewables"]

In [None]:
display(renewables.head(10))
display(renewables.tail(10))

In [None]:
renewables.loc[["FR_source_windoffshore", "IT_source_windoffshore"]]

In [None]:
# Hydro values are in kWh/h and reflect the inflow potential; thus therefore are not comparable!
hydro = renewables.loc[renewables.index.str.contains("_hydro")]
non_hydro_renewables = renewables.loc[~(renewables.index.str.contains("_hydro"))]
print("Hydro")
display(hydro.describe())
print("Remaining RES")
display(non_hydro_renewables.describe())

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(16, 16))
_ = hydro["capacity"].sort_values(ascending=False).plot(kind="bar", ax=ax[0])
_ = non_hydro_renewables["capacity"].sort_values(ascending=False).plot(kind="bar", ax=ax[1])
plt.tight_layout()
plt.show()

### Exogenous storages

In [None]:
exogenous_storages = input_data["exogenous_storages_el"]

In [None]:
exogenous_storages.columns

In [None]:
phes = exogenous_storages.loc[exogenous_storages["type"] == "phes"]
reservoir = exogenous_storages.loc[exogenous_storages["type"] == "reservoir"]
print("PHES")
display(phes.iloc[:,:15].describe())
display(phes.iloc[:,15:].describe())
print("Reservoir")
display(reservoir.iloc[:,:15].describe())
display(reservoir.iloc[:,15:].describe())

### New-built storages
Only visual inspection

In [None]:
new_storages = input_data["new_built_storages_el"]

In [None]:
new_storages.columns

In [None]:
new_storages.iloc[:,:15]

In [None]:
new_storages.iloc[:,15:]

## Transformers
### Exogenous transformers

In [None]:
exogenous_transformers = input_data["exogenous_transformers"]

In [None]:
exogenous_transformers.columns

In [None]:
display(exogenous_transformers.iloc[:,:15].head(10))
display(exogenous_transformers.iloc[:,15:30].head(10))
display(exogenous_transformers.iloc[:,30:45].head(10))
display(exogenous_transformers.iloc[:,45:].head(10))

In [None]:
exogenous_transformers.columns

In [None]:
display(exogenous_transformers.iloc[:,:15].describe())
display(exogenous_transformers.iloc[:,15:30].describe())

### New-built transformers

## Linking transformers

In [None]:
linking_transformers = input_data["linking_transformers"]

In [None]:
linking_transformers.describe()

In [None]:
linking_transformers.conversion_factor.unique()

In [None]:
linking_transformers.type.unique()

In [None]:
linking_transformers_transposed = linking_transformers.drop(
    columns=["from", "to", "conversion_factor", "type"]
).T.astype("float64")
plot_time_series_cols(linking_transformers_transposed)

# Timeseries
## Demand

In [None]:
sinks_demand_el_ts = input_data["sinks_demand_el_ts"]

In [None]:
sinks_demand_el_ts.describe()

In [None]:
plot_time_series_cols(sinks_demand_el_ts)

## RES generation

In [None]:
sources_renewables_ts = input_data["sources_renewables_ts"]

In [None]:
sources_renewables_ts.iloc[:,:15].describe()

In [None]:
sources_renewables_ts.iloc[:,15:30].describe()

In [None]:
sources_renewables_ts.iloc[:,30:45].describe()

In [None]:
sources_renewables_ts.iloc[:,45:60].describe()

In [None]:
plot_time_series_cols(sources_renewables_ts)

## Transformers
### Minimum loads

In [None]:
min_loads_ts = input_data["transformers_minload_ts"]

In [None]:
min_loads_ts.describe()

In [None]:
plot_time_series_cols(min_loads_ts[:8760])

### Availability

In [None]:
availability_ts = input_data["transformers_availability_ts"]

In [None]:
availability_ts.describe()

In [None]:
_ = availability_ts[:8760].plot()
plt.show()

### Exogenous max capacity
Development factors for exogenous maximum capacity of transformer clusters

In [None]:
transformers_exogenous_max_ts = input_data["transformers_exogenous_max_ts"]

In [None]:
transformers_exogenous_max_ts.describe()

In [None]:
plot_time_series_cols(transformers_exogenous_max_ts)

## Linking transformers
### Hourly time series

In [None]:
linking_transformers_ts = input_data["linking_transformers_ts"]

In [None]:
linking_transformers_ts.describe()

In [None]:
plot_time_series_cols(linking_transformers_ts)

### Annual time series

In [None]:
linking_transformers_annual_ts = input_data["linking_transformers_annual_ts"]

In [None]:
linking_transformers_annual_ts.describe()

In [None]:
plot_time_series_cols(linking_transformers_annual_ts)

## Costs
### Fuel costs

In [None]:
costs_fuel_ts = input_data["costs_fuel_ts"]

In [None]:
costs_fuel_ts.describe()

In [None]:
plot_time_series_cols(costs_fuel_ts)

### Emissions costs

In [None]:
costs_emissions_ts = input_data["costs_emissions_ts"]

In [None]:
costs_emissions_ts.describe()

In [None]:
_ = costs_emissions_ts.iloc[:,0].plot()
plt.show()

### Costs operation

In [None]:
costs_operation_ts = input_data["costs_operation_ts"]

In [None]:
costs_operation_ts.describe()

In [None]:
plot_time_series_cols(costs_operation_ts)

### Costs operation storages

In [None]:
costs_operation_storages_ts = input_data["costs_operation_storages_ts"]

In [None]:
costs_operation_storages_ts.describe()

In [None]:
plot_time_series_cols(costs_operation_storages_ts)

### Costs investment

In [None]:
costs_investment = input_data["costs_investment"]

In [None]:
costs_investment.describe()

In [None]:
plot_time_series_cols(costs_investment)

### Costs storages investment
#### investment in capacity

In [None]:
costs_storages_investment_capacity = input_data["costs_storages_investment_capacity"]

In [None]:
costs_storages_investment_capacity.describe()

In [None]:
plot_time_series_cols(costs_storages_investment_capacity)

#### investment in power

In [None]:
costs_storages_investment_power = input_data["costs_storages_investment_power"]

In [None]:
costs_storages_investment_power.describe()

In [None]:
plot_time_series_cols(costs_storages_investment_power)

### Fixed costs

In [None]:
fixed_costs = input_data["fixed_costs"]

In [None]:
fixed_costs.describe()

In [None]:
fixed_costs

In [None]:
fixed_costs_absolute = pd.DataFrame(index=costs_investment.index, columns=costs_investment.columns)
for col in costs_investment.columns:
    fixed_costs_absolute[col] = costs_investment[col] * fixed_costs.loc[col].values[0]/100

In [None]:
fixed_costs_absolute.describe()

In [None]:
plot_time_series_cols(fixed_costs_absolute)

### Fixed costs storages

In [None]:
fixed_costs_storages = input_data["fixed_costs_storages"]

In [None]:
fixed_costs_storages.describe()

In [None]:
fixed_costs_storages

#### Fixed costs for capacity

In [None]:
costs_storages_investment_capacity

In [None]:
fixed_costs_storages_capacity_absolute = pd.DataFrame(
    index=costs_storages_investment_capacity.index, 
    columns=costs_storages_investment_capacity.columns
)
for col in costs_storages_investment_capacity.columns:
    try:
        fixed_costs_storages_capacity_absolute[col] = (
            costs_storages_investment_capacity[col] * fixed_costs_storages.loc[col].values[0]/100
        )
    except KeyError:
        pass

In [None]:
fixed_costs_storages_capacity_absolute.describe()

In [None]:
plot_time_series_cols(fixed_costs_storages_capacity_absolute)

#### Fixed costs for power

## Storages
### Exocenous maximum capacity

In [None]:
storages_el_exogenous_max_ts = input_data["storages_el_exogenous_max_ts"]

In [None]:
storages_el_exogenous_max_ts.describe()

In [None]:
plot_time_series_cols(storages_el_exogenous_max_ts)

## Emission limits

In [None]:
emission_limits = input_data["emission_limits"]

In [None]:
emission_limits.describe()

In [None]:
plot_time_series_cols(emission_limits)

# Other data
## WACC

In [None]:
wacc = input_data["wacc"]

In [None]:
wacc.describe()

In [None]:
wacc

## Interest rate

In [None]:
interest_rate = input_data["interest_rate"]

In [None]:
interest_rate.describe()

In [None]:
interest_rate

## Hydrogen investment maxima

In [None]:
hydrogen_investment_maxima = input_data["hydrogen_investment_maxima"]

In [None]:
hydrogen_investment_maxima.describe()

In [None]:
_ = hydrogen_investment_maxima.plot()
plt.show()

## Emission development factors

In [None]:
emission_development_factors = input_data["emission_development_factors"]

In [None]:
emission_development_factors.describe()

In [None]:
plot_time_series_cols(emission_development_factors)