# Investment model sanity check for inputs
This notebook seems to inspect all the inputs passed to _pommesinvest_ in order to inspect potential data bugs from
* visual inspection,
* analysis of some statistic moments,
* assert statements to check whether assumptions are valid.

## Package imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from pommesevaluation.pommesinvest_routines import (
    InvestmentModelDummy, load_input_data, process_input_data, resample_timeseries
)
from pommesevaluation.investment_results_inspection import (
    plot_time_series_cols, create_datetime_index
)

## Parameter settings

In [None]:
plt.rcParams.update({'font.size': 12})

# Starting basis applied for all scenarios
path_folder_input = "./model_inputs/pommesinvest/"
start_time = "2020-01-01 00:00:00"
end_time = "2045-12-31 23:00:00"
freq = "1H"
overlap_in_time_steps = 0
countries = [
    "AT", "BE", "CH", "CZ", "DE", "DK1", "DK2", "FR", "NL", 
    "NO1", "NO2", "NO3", "NO4", "NO5", "PL", "SE1", "SE2", "SE3", "SE4",
    "IT",
]
fuel_cost_pathway = "NZE"  # net zero emissions
fuel_price_shock = "high"
emissions_cost_pathway = "long-term"
activate_emissions_pathway_limit = True
activate_emissions_budget_limit = False

# Parameters varying scenario-dependent
activate_demand_response = False
flexibility_options_scenario = "50"
demand_response_scenario = "50"

## Read in data sets
* Create an "InvestmentModelDummy" object, which holds some of the attributes of an InvestmentModel, but not really is one.
* Copy & paste methods from pommesinvest for reading in data to obtain the exact same data sets and make the data read in dependent on the configuration.

In [None]:
im = InvestmentModelDummy(
    path_folder_input=path_folder_input,
    countries=countries,
    start_time=start_time,
    end_time=end_time,
    overlap_in_time_steps=overlap_in_time_steps,
    freq=freq,
    fuel_cost_pathway=fuel_cost_pathway,
    fuel_price_shock=fuel_price_shock,
    emissions_cost_pathway=emissions_cost_pathway,
    flexibility_options_scenario=flexibility_options_scenario,
    activate_emissions_pathway_limit=activate_emissions_pathway_limit,
    activate_emissions_budget_limit=activate_emissions_budget_limit,
    activate_demand_response=activate_demand_response,
    demand_response_scenario=demand_response_scenario
)

In [None]:
input_data = process_input_data(im)

# Components

## Buses
Assertion statements

In [None]:
buses = input_data["buses"]

display(buses.head(10))
display(buses.tail(10))

In [None]:
buses.describe()

In [None]:
check_countries = (buses.index.str.split("_", 1, expand=True).get_level_values(0) == buses["country"]).all()
assert check_countries == True, f"Expected False, but result was {to_check}"

bus_names = set(buses.index.str.split("_", expand=True).get_level_values(1))
assert bus_names == {"bus"}, f"Expected 'bus' as the only name, but result was {bus_names}"

bus_commodities = set(buses.index.str.rsplit("_", 1, expand=True).get_level_values(1))
all_commodities = {
    'el', 'biomass', 'hardcoal', 'hydro', 'hydrogen', 
    'lignite', 'mixedfuels', 'natgas', 'oil', 'otherfossil',
    'solarPV', 'uranium', 'waste', 'windoffshore', 'windonshore'
}
assert bus_commodities == all_commodities

## Sinks
### Excess Sinks
Only visual inspection sufficient

In [None]:
sinks_excess = input_data["sinks_excess"]

In [None]:
sinks_excess

### Demand sinks
* Should contain maximum values as of 2020
* Visual inspection

In [None]:
sinks_demand_el = input_data["sinks_demand_el"]

In [None]:
sinks_demand_el

In [None]:
fig, ax = plt.subplots(figsize=(16, 10))
_ = sinks_demand_el["maximum"].sort_values(ascending=False).plot(kind="bar", ax=ax)
plt.show()

## Sources
### Shortage sources
Only visual inspection

In [None]:
shortage = input_data["sources_shortage"]

In [None]:
shortage

### Commodity sources
Only visual inspection

In [None]:
commodities = input_data["sources_commodity"]

In [None]:
commodities

In [None]:
fuels = commodities.index.str.rsplit("_", 1, expand=True).get_level_values(1).unique()
for fuel in fuels:
    display(commodities.loc[commodities.index.str.contains(fuel)])

### Renewable sources
* Visual inspection
* Statistical moments of data set

In [None]:
renewables = input_data["sources_renewables"]

In [None]:
display(renewables.head(10))
display(renewables.tail(10))

In [None]:
renewables.loc[["FR_source_windoffshore", "IT_source_windoffshore"]]

In [None]:
# Hydro values are in kWh/h and reflect the inflow potential; thus therefore are not comparable!
hydro = renewables.loc[renewables.index.str.contains("_hydro")]
non_hydro_renewables = renewables.loc[~(renewables.index.str.contains("_hydro"))]
print("Hydro")
display(hydro.describe())
print("Remaining RES")
display(non_hydro_renewables.describe())

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(16, 16))
_ = hydro["capacity"].sort_values(ascending=False).plot(kind="bar", ax=ax[0])
_ = non_hydro_renewables["capacity"].sort_values(ascending=False).plot(kind="bar", ax=ax[1])
plt.tight_layout()
plt.show()

### Exogenous storages

In [None]:
exogenous_storages = input_data["exogenous_storages_el"]

In [None]:
exogenous_storages.columns

In [None]:
phes = exogenous_storages.loc[exogenous_storages["type"] == "phes"]
reservoir = exogenous_storages.loc[exogenous_storages["type"] == "reservoir"]
print("PHES")
display(phes.iloc[:,:15].describe())
display(phes.iloc[:,15:].describe())
print("Reservoir")
display(reservoir.iloc[:,:15].describe())
display(reservoir.iloc[:,15:].describe())

### New-built storages
Only visual inspection

In [None]:
new_storages = input_data["new_built_storages_el"]

In [None]:
new_storages.columns

In [None]:
new_storages.iloc[:,:15]

In [None]:
new_storages.iloc[:,15:]

## Transformers
### Exogenous transformers

In [None]:
exogenous_transformers = input_data["exogenous_transformers"]

In [None]:
exogenous_transformers.columns

In [None]:
display(exogenous_transformers.iloc[:,:15].head(10))
display(exogenous_transformers.iloc[:,15:30].head(10))
display(exogenous_transformers.iloc[:,30:45].head(10))
display(exogenous_transformers.iloc[:,45:].head(10))

In [None]:
exogenous_transformers.columns

In [None]:
display(exogenous_transformers.iloc[:,:15].describe())
display(exogenous_transformers.iloc[:,15:30].describe())

### New-built transformers

In [None]:
new_built_transformers = input_data["new_built_transformers"]
new_built_transformers

## Linking transformers

In [None]:
linking_transformers = input_data["linking_transformers"]

In [None]:
linking_transformers.describe()

In [None]:
linking_transformers.conversion_factor.unique()

In [None]:
linking_transformers.type.unique()

In [None]:
linking_transformers_transposed = linking_transformers.drop(
    columns=["from", "to", "conversion_factor", "type"]
).T.astype("float64")
plot_time_series_cols(linking_transformers_transposed)

# Timeseries
## Demand

In [None]:
sinks_demand_el_ts = input_data["sinks_demand_el_ts"]

In [None]:
sinks_demand_el_ts.describe()

In [None]:
plot_time_series_cols(sinks_demand_el_ts)

In [None]:
sinks_demand_el_ts_absolute = pd.DataFrame(
    index=sinks_demand_el_ts.index,
    columns=sinks_demand_el_ts.columns
)
for col in sinks_demand_el_ts.columns:
    sinks_demand_el_ts_absolute[col] = sinks_demand_el_ts[col] * sinks_demand_el.at[col, "maximum"]

In [None]:
plot_time_series_cols(sinks_demand_el_ts_absolute)

## RES generation

In [None]:
sources_renewables_ts = input_data["sources_renewables_ts"]

In [None]:
sources_renewables_ts.iloc[:,:15].describe()

In [None]:
sources_renewables_ts.iloc[:,15:30].describe()

In [None]:
sources_renewables_ts.iloc[:,30:45].describe()

In [None]:
sources_renewables_ts.iloc[:,45:60].describe()

In [None]:
plot_time_series_cols(sources_renewables_ts)

In [None]:
sources_renewables_ts_absolute = pd.DataFrame(
    index=sources_renewables_ts.index,
    columns=sources_renewables_ts.columns
)
for col in sources_renewables_ts.columns:
    try:
        sources_renewables_ts_absolute[col] = sources_renewables_ts[col] * renewables.at[col, "capacity"]
    except KeyError:
        pass

In [None]:
plot_time_series_cols(sources_renewables_ts_absolute)

## Transformers
### Minimum loads

In [None]:
min_loads_ts = input_data["transformers_minload_ts"]

In [None]:
min_loads_ts.describe()

In [None]:
plot_time_series_cols(min_loads_ts[:8760])

### Availability

In [None]:
availability_ts = input_data["transformers_availability_ts"]

In [None]:
availability_ts.describe()

In [None]:
_ = availability_ts[:8760].plot()
plt.show()

### Exogenous max capacity
Development factors for exogenous maximum capacity of transformer clusters

In [None]:
transformers_exogenous_max_ts = input_data["transformers_exogenous_max_ts"]

In [None]:
transformers_exogenous_max_ts.describe()

In [None]:
plot_time_series_cols(transformers_exogenous_max_ts)

## Linking transformers
### Hourly time series

In [None]:
linking_transformers_ts = input_data["linking_transformers_ts"]

In [None]:
linking_transformers_ts.describe()

In [None]:
plot_time_series_cols(linking_transformers_ts)

### Annual time series

In [None]:
linking_transformers_annual_ts = input_data["linking_transformers_annual_ts"]

In [None]:
linking_transformers_annual_ts.describe()

In [None]:
plot_time_series_cols(linking_transformers_annual_ts)

## Costs
### Fuel costs

In [None]:
costs_fuel_ts = input_data["costs_fuel_ts"]

In [None]:
costs_fuel_ts.describe()

In [None]:
plot_time_series_cols(costs_fuel_ts)

### Emissions costs

In [None]:
costs_emissions_ts = input_data["costs_emissions_ts"]

In [None]:
costs_emissions_ts.describe()

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
_ = costs_emissions_ts.iloc[:,0].plot(ax=ax)
plt.tight_layout()
plt.show()

### Costs operation

In [None]:
costs_operation_ts = input_data["costs_operation_ts"]

In [None]:
costs_operation_ts.describe()

In [None]:
plot_time_series_cols(costs_operation_ts)

### Costs operation storages

In [None]:
costs_operation_storages_ts = input_data["costs_operation_storages_ts"]

In [None]:
costs_operation_storages_ts.describe()

In [None]:
plot_time_series_cols(costs_operation_storages_ts)

### Costs investment

In [None]:
costs_investment = input_data["costs_investment"]

In [None]:
costs_investment.describe()

In [None]:
plot_time_series_cols(costs_investment)

### Costs storages investment
#### investment in capacity

In [None]:
costs_storages_investment_capacity = input_data["costs_storages_investment_capacity"]

In [None]:
costs_storages_investment_capacity.describe()

In [None]:
plot_time_series_cols(costs_storages_investment_capacity)

#### investment in power

In [None]:
costs_storages_investment_power = input_data["costs_storages_investment_power"]

In [None]:
costs_storages_investment_power.describe()

In [None]:
plot_time_series_cols(costs_storages_investment_power)

### Fixed costs

In [None]:
fixed_costs = input_data["fixed_costs"]

In [None]:
fixed_costs.describe()

In [None]:
fixed_costs

In [None]:
fixed_costs_absolute = pd.DataFrame(index=costs_investment.index, columns=costs_investment.columns)
for col in costs_investment.columns:
    fixed_costs_absolute[col] = costs_investment[col] * fixed_costs.loc[col].values[0]/100

In [None]:
fixed_costs_absolute.describe()

In [None]:
plot_time_series_cols(fixed_costs_absolute)

### Fixed costs storages

In [None]:
fixed_costs_storages = input_data["fixed_costs_storages"]

In [None]:
fixed_costs_storages.describe()

In [None]:
fixed_costs_storages

#### Fixed costs for capacity

In [None]:
costs_storages_investment_capacity

In [None]:
fixed_costs_storages_capacity_absolute = pd.DataFrame(
    index=costs_storages_investment_capacity.index, 
    columns=costs_storages_investment_capacity.columns
)
for col in costs_storages_investment_capacity.columns:
    try:
        fixed_costs_storages_capacity_absolute[col] = (
            costs_storages_investment_capacity[col] * fixed_costs_storages.loc[col].values[0]/100
        )
    except KeyError:
        pass

In [None]:
fixed_costs_storages_capacity_absolute.describe()

In [None]:
plot_time_series_cols(fixed_costs_storages_capacity_absolute)

#### Fixed costs for power

In [None]:
fixed_costs_storages_power_absolute = pd.DataFrame(
    index=costs_storages_investment_power.index, 
    columns=costs_storages_investment_power.columns
)
for col in costs_storages_investment_capacity.columns:
    try:
        fixed_costs_storages_power_absolute[col] = (
            costs_storages_investment_capacity[col] * fixed_costs_storages.loc[col].values[0]/100
        )
    except KeyError:
        pass

In [None]:
fixed_costs_storages_power_absolute.describe()

In [None]:
plot_time_series_cols(fixed_costs_storages_power_absolute)

## Storages
### Exocenous maximum capacity

In [None]:
storages_el_exogenous_max_ts = input_data["storages_el_exogenous_max_ts"]

In [None]:
storages_el_exogenous_max_ts.describe()

In [None]:
plot_time_series_cols(storages_el_exogenous_max_ts)

## Emission limits

In [None]:
emission_limits = input_data["emission_limits"]

In [None]:
emission_limits.describe()

In [None]:
plot_time_series_cols(emission_limits)

# Other data
## WACC

In [None]:
wacc = input_data["wacc"]

In [None]:
wacc.describe()

In [None]:
wacc

## Interest rate

In [None]:
interest_rate = input_data["interest_rate"]

In [None]:
interest_rate.describe()

In [None]:
interest_rate

## Hydrogen investment maxima

In [None]:
hydrogen_investment_maxima = input_data["hydrogen_investment_maxima"]

In [None]:
hydrogen_investment_maxima.describe()

In [None]:
_ = hydrogen_investment_maxima.plot()
plt.show()

## Emission development factors

In [None]:
emission_development_factors = input_data["emission_development_factors"]

In [None]:
emission_development_factors.describe()

In [None]:
plot_time_series_cols(emission_development_factors)

# Demand Response data

In [None]:
activate_demand_response = True
im.activate_demand_response = activate_demand_response

input_data = process_input_data(im)

## Demand

In [None]:
sinks_demand_el_ts = input_data["sinks_demand_el_ts"]

In [None]:
sinks_demand_el_ts.describe()

In [None]:
plot_time_series_cols(sinks_demand_el_ts)

In [None]:
sinks_demand_el = input_data["sinks_demand_el"]

In [None]:
sinks_demand_el

In [None]:
sinks_demand_el_ts_absolute = pd.DataFrame(
    index=sinks_demand_el_ts.index,
    columns=sinks_demand_el_ts.columns
)
for col in sinks_demand_el_ts.columns:
    sinks_demand_el_ts_absolute[col] = sinks_demand_el_ts[col] * sinks_demand_el.at[col, "maximum"]

In [None]:
plot_time_series_cols(sinks_demand_el_ts_absolute)

## Eligibility

In [None]:
dr_clusters = load_input_data(
    filename="demand_response_clusters_eligibility", im=im
)

In [None]:
dr_clusters

## Potential parameters

In [None]:
potential_parameters = {}
for dr_cluster in dr_clusters.index:
    potential_parameters[dr_cluster] = input_data[f"sinks_dr_el_{dr_cluster}"]
    print("*" * 50)
    print(dr_cluster)
    print("*" * 50)
    plot_time_series_cols(potential_parameters[dr_cluster])

## Variable costs

In [None]:
variable_costs_parameters = {}
for dr_cluster in dr_clusters.index:
    variable_costs_parameters[dr_cluster] = input_data[f"sinks_dr_el_{dr_cluster}_variable_costs"]
    print("*" * 50)
    print(dr_cluster)
    print("*" * 50)
    plot_time_series_cols(variable_costs_parameters[dr_cluster])

## Investment expenses and fixed costs

In [None]:
fixed_costs_parameters = {}
for dr_cluster in dr_clusters.index:
    fixed_costs_parameters[dr_cluster] = input_data[f"sinks_dr_el_{dr_cluster}_fixed_costs_and_investments"]
    print("*" * 50)
    print(dr_cluster)
    print("*" * 50)
    plot_time_series_cols(fixed_costs_parameters[dr_cluster])

## Demand Response baseline consumption

In [None]:
sinks_dr_el_ts = input_data["sinks_dr_el_ts"]

In [None]:
sinks_dr_el_ts.describe()

In [None]:
plot_time_series_cols(sinks_dr_el_ts[:8760])

In [None]:
sinks_dr_el_ts_absolute = {}
for dr_cluster in dr_clusters.index:
    df = create_datetime_index(potential_parameters[dr_cluster].loc[:,["max_cap"]])
    df = resample_timeseries(
        df, freq, aggregation_rule="sum", interpolation_rule="ffill"
    )[:-1]
    sinks_dr_el_ts_absolute[dr_cluster] = sinks_dr_el_ts[dr_cluster] * df["max_cap"].values
    fig, ax = plt.subplots(figsize=(15, 5))
    _ = sinks_dr_el_ts_absolute[dr_cluster].plot(ax=ax)
    plt.title(dr_cluster)
    plt.tight_layout()
    plt.show()

## Availabilty positive

In [None]:
sinks_dr_el_ava_pos_ts = input_data["sinks_dr_el_ava_pos_ts"]

In [None]:
sinks_dr_el_ava_pos_ts.describe()

In [None]:
plot_time_series_cols(sinks_dr_el_ava_pos_ts[:8760])

In [None]:
sinks_dr_el_ava_pos_ts_absolute = {}
for dr_cluster in dr_clusters.index:
    df = create_datetime_index(
        potential_parameters[dr_cluster].loc[:,["potential_pos_overall", "potential_neg_overall", "max_cap"]]
    )
    df["max_potential"] = df[["potential_pos_overall", "potential_neg_overall"]].max(axis=1)
    df = df[["max_potential", "max_cap"]]
    df["annual_invest_limit"] = df.min(axis=1)
    df = df[["annual_invest_limit"]]
    df = resample_timeseries(
        df, freq, aggregation_rule="sum", interpolation_rule="ffill"
    )[:-1]
    sinks_dr_el_ava_pos_ts_absolute[dr_cluster] = sinks_dr_el_ava_pos_ts[dr_cluster] * df["annual_invest_limit"].values
    fig, ax = plt.subplots(figsize=(15, 5))
    _ = sinks_dr_el_ava_pos_ts_absolute[dr_cluster].plot(ax=ax)
    plt.title(dr_cluster)
    plt.tight_layout()
    plt.show()

## Availabilty negative

In [None]:
sinks_dr_el_ava_neg_ts = input_data["sinks_dr_el_ava_neg_ts"]

In [None]:
sinks_dr_el_ava_neg_ts.describe()

In [None]:
plot_time_series_cols(sinks_dr_el_ava_neg_ts[:8760])

In [None]:
sinks_dr_el_ava_neg_ts_absolute = {}
for dr_cluster in dr_clusters.index:
    df = create_datetime_index(
        potential_parameters[dr_cluster].loc[:,["potential_pos_overall", "potential_neg_overall", "max_cap"]]
    )
    df["max_potential"] = df[["potential_pos_overall", "potential_neg_overall"]].max(axis=1)
    df = df[["max_potential", "max_cap"]]
    df["annual_invest_limit"] = df.min(axis=1)
    df = df[["annual_invest_limit"]]
    df = resample_timeseries(
        df, freq, aggregation_rule="sum", interpolation_rule="ffill"
    )[:-1]
    sinks_dr_el_ava_neg_ts_absolute[dr_cluster] = sinks_dr_el_ava_neg_ts[dr_cluster] * df["annual_invest_limit"].values
    fig, ax = plt.subplots(figsize=(15, 5))
    _ = sinks_dr_el_ava_neg_ts_absolute[dr_cluster].plot(ax=ax)
    plt.title(dr_cluster)
    plt.tight_layout()
    plt.show()