## Set up `GenX` Project in Current Folder

This step assumes you've installed `julia` already but have not set up the current `GenX` project folder.

In [None]:
!julia --project=. Install.jl

## Export `GenX` Input CSVs from Spreadsheet

In [None]:
import shutil

import xlwings as xw
import pandas as pd
from collections import defaultdict
import os
from upath import UPath
from loguru import logger
import sys
from datetime import datetime

logger.remove()
logger.add(sys.stderr, backtrace=False)

- [x] Do the xlwings thing where the SharePoint path is updated in the spreadsheet -> used VBA UDF
- [ ] Do we need something like `kit-ui connect`? I don't think so
- [ ] File picker to connect to spreadsheet?
- [x] Format this notebook before committing
- [x] Copy Run.jl
- [x] Copy settings
- [ ] Check that demand_data, fuel_data, and generator_variability CSVs have the same length
- [x] Save a mapping of the planning period to year, so that it can be read in by spreadsheet, or when concatenating the dataframe (instead of guessing)

In [None]:
wb = xw.Book(
    '/Users/roderick/Library/CloudStorage/OneDrive-SharedLibraries-ResilientTransition/5.001 Kentucky Resource Council - Documents/Data/Kentucky Load Resource Model.xlsb'
)

In [None]:
def save_case(base_folder: UPath, case_subfolder: str | None = None):
    global name, range, col
    # Get CSV names as a nested dictionary (since some CSVs have been split into multiple separate tables
    # Named ranges have the format of [csv file name]...[#]...[optional transformation, either .T or .ffill]
    csv_names = defaultdict(list)
    for name in wb.names:
        if ".csv" in name.name:
            csv_names[name.name.split("...")[0]].append(name)
    for csv_name, ranges in csv_names.items():
        dfs = []
        for rng in ranges:
            # Get each range as a dataframe
            df = rng.refers_to_range.options(pd.DataFrame, index=0,
                                               header=(1 if not rng.name.endswith("...T") else 0)).value
            df = df.dropna(how="all", axis=1)
            df = df.dropna(how="all", axis=0)
            if "resource" in df.columns:
                df = df.dropna(subset="resource", axis=0)
            if "drop" in df.columns:
                df = df[df["drop"] != True]

            # Apply optional transform
            if rng.name.endswith("...T"):
                df = df.set_index(df.columns[0])
                df = df.T
            elif rng.name.endswith("...ffill"):
                df = df.ffill()
            elif rng.name.endswith("...drop...1"):
                df = df.iloc[:, [0, -1]]
                df = df.dropna(how="any")
            elif rng.name.endswith("...drop...3"):
                df = df.iloc[:, [0, -3, -2, -1]]
                df = df.dropna(how="any")

            if csv_name in [
                "resources\\policy_assignments\\Resource_NQC_derate.csv",
                "resources\\policy_assignments\\ELCC_multipliers.csv",
                "resources\\Resource_multistage_data.csv",
            ]:
                df = df.rename(columns={"resource": "Resource"})

            # Change types for columns to int & strings
            int_columns = [
                col for col in df.columns if col in
                 [
                     "can_retire",
                     "zone",
                     "new_build",
                     "model",
                     "lds",
                     "Time_Index"
                 ]
            ]
            df[int_columns] = df[int_columns].astype(int)

            str_columns = [
                col for col in df.columns if col in
                 [
                     "cluster",
                     "region",
                 ]
            ]
            df[str_columns] = df[str_columns].astype(str)

            if df.isna().any().any():
                logger.error(f"{csv_name} has blank cells. GenX currently does not have consistent handling of missing data, so please fill in or add placeholder values.")

            dfs.append(df)

        # Join all the dfs
        final_df = pd.concat([df.reset_index(drop=True) for df in dfs], axis=1)

        # Save joined dataframe to CSV
        planning_period_folder = base_folder / case_subfolder if case_subfolder else base_folder
        filepath = planning_period_folder / csv_name.replace("\\", os.sep)
        filepath.parent.mkdir(parents=True, exist_ok=True)
        final_df.to_csv(filepath, index=False)

In [None]:
base_folder = UPath(wb.names["BaseFolder"].refers_to_range.value)
case_name = wb.names["CaseName"].refers_to_range.value

if base_folder.exists():
    logger.warning(f"Overwriting case: {base_folder}")
base_folder.mkdir(parents=True, exist_ok=True)

# Save mapping of planning periods to period IDs
planning_periods = wb.sheets["GenX Settings"].tables["ModeledYears"].range.options(pd.DataFrame, index=1).value.dropna().index.astype(int).values

# Save mapping of planning periods so that we know what years to map inputs_p1, etc. to
wb.sheets["GenX Settings"].tables["ModeledYears"].range.options(pd.DataFrame, index=1).value.dropna().to_csv(base_folder / "planning_periods.csv", index=True)

counter = 1
for planning_period in planning_periods:
    wb.sheets["GenX Settings"].range("ActiveYear").value = planning_period
    wb.app.calculate()

    logger.info(f"Saving case inputs for {planning_period}: (inputs_p{counter})")
    save_case(base_folder=base_folder, case_subfolder=f"inputs/inputs_p{counter}")
    counter += 1

    # Save settings .yml files
    wb.sheets["GenX Settings"].range("settings\genx_settings.yml").options(pd.DataFrame).value

# Settings
logger.info("Saving settings...")

base_settings_folder = UPath("/Users/roderick/PycharmProjects/resilient-transition/GenX.jl/__base_settings__")

if (base_folder / "settings").exists():
    shutil.rmtree(base_folder / "settings")
shutil.copytree(base_settings_folder, base_folder / "settings")

# TODO: Clean up how these settings files are parsed

# genx_settings.yml
wb.sheets["GenX Settings"].range(r"settings\genx_settings.yml").options(pd.Series, header=False).value.astype(int).reset_index().astype(str).agg("".join, axis=1).to_csv(base_folder / "settings" / "genx_settings.yml", index=False, header=False, sep="\t")

# multi_stage_settings.yml
wb.sheets["GenX Settings"].range(r"settings\multi_stage_settings.yml").options(pd.Series, header=False).value.apply(lambda x: int(x) if isinstance(x, (float, bool, int)) else x).reset_index().astype(str).agg("".join, axis=1).to_csv(base_folder / "settings" / "multi_stage_settings.yml", index=False, header=False, sep="\t")

# time_domain_reduction_settings.yml
wb.sheets["GenX Settings"].range(r"settings\time_domain_reduction_settings.yml").options(pd.Series, header=False).value.replace({None: " "}).apply(lambda x: int(x) if isinstance(x, (float, bool, int)) else x).reset_index().astype(str).agg("".join, axis=1).replace({"None": ""}).to_csv(base_folder / "settings" / "time_domain_reduction_settings.yml", index=False, header=False, sep="\t")

# highs_settings.yml
wb.sheets["GenX Settings"].range(r"settings\highs_settings.yml").options(pd.Series, header=False).value.replace({None: " "}).apply(lambda x: int(x) if isinstance(x, (bool, int)) else x).reset_index().astype(str).agg("".join, axis=1).replace({"None": ""}).to_csv(base_folder / "settings" / "highs_settings.yml", index=False, header=False, sep="\t")

logger.success(f"Saved multi-stage capacity expansion case: {case_name}")

In [None]:
!julia --project=. Run.jl $base_folder

In [None]:
if (base_folder / "planning_periods.csv").exists():
    periods_range = pd.read_csv(base_folder / "planning_periods.csv", index_col=-1)["Planning Period"].astype("int").to_dict()
    periods_range = {base_folder / "results" / f"results_{k}": v for k, v in periods_range.items()}
else:
    subfolders = sorted(list((base_folder / "results").glob("results_p*")), key=lambda path: int(path.stem.split("results_p")[-1]))
    periods_range = {p: None for p in subfolders}

# Total Capacity
portfolio = pd.read_csv(base_folder / "results" / "capacities_multi_stage.csv", index_col=0)
portfolio = portfolio[[col for col in portfolio.columns if not col.startswith("StartCap")]]
portfolio = portfolio.rename(columns={"EndCap_p"+ path.stem.split("results_p")[-1]: period for path, period in periods_range.items()})
portfolio = portfolio.drop(["Zone"], axis=1)
wb.sheets["GenX Results"].range("capacities_multi_stage").clear_contents()
wb.sheets["GenX Results"].range("capacities_multi_stage").value = portfolio

# Builds
def get_net_build(path):
    df = pd.read_csv(path / "capacity.csv", index_col=0)[["NewCap", "RetCap"]]
    return df["NewCap"] - df["RetCap"]
builds = pd.concat({period: get_net_build(path) for path, period in periods_range.items()}, axis=1)
wb.sheets["GenX Results"].range("capacities").clear_contents()
wb.sheets["GenX Results"].range("capacities").value = builds

# CFs
cfs = pd.concat({period: pd.read_csv(path / "capacityfactor.csv", index_col=0)["CapacityFactor"] for path, period in periods_range.items()}, axis=1)
wb.sheets["GenX Results"].range("cfs").clear_contents()
wb.sheets["GenX Results"].range("cfs").value = cfs

# Generation
generation = (pd.concat({period: pd.read_csv(path / "power.csv", index_col=0).T["AnnualSum"] for path, period in periods_range.items()}, axis=1) / 1e6).round(3)
wb.sheets["GenX Results"].range("generation").clear_contents()
wb.sheets["GenX Results"].range("generation").value = generation


wb.sheets["GenX Results"].activate()
wb.app.calculate()
print(f"Loaded results at: {datetime.now()}")

# TDR Clustering Visualization

- [ ] Plots
- [ ] Are TDRs the same for every planning period? If not, how much are they changing?

In [None]:
import plotly.graph_objects as go
import plotly.io as pio

axes = dict(
    showgrid=False,
    linecolor="rgb(120, 120, 120)",
    linewidth=1,
    showline=True,
    ticks="outside",
    tickcolor="rgb(120, 120, 120)",
    mirror=True,
)

pio.templates["e3"] = go.layout.Template(
    layout=go.Layout(
        font=dict(family="CommitMono", size=11, color="rgb(120, 120, 120)"),
        title=dict(
            font=dict(
                # size=32,
                color="rgb(3, 78, 110)",
            ),
            x=0.05,
            y=0.95,
            xanchor="left",
            yanchor="bottom",
        ),
        xaxis=axes,
        yaxis=axes,
        margin=dict(t=60, b=100, r=60, l=60),
    )
)

pio.templates["5.4x12.32"] = go.layout.Template(
    layout=go.Layout(
        height=5.4 * 144,
        width=12.32 * 144,
    )
)

pio.templates.default = "e3"

In [None]:
import yaml
import pandas as pd
from upath import UPath
from plotly.subplots import make_subplots
import plotly.graph_objects as go

base_path = base_folder / "inputs" / "inputs_p1"
timeseries_to_compare = {
    "Demand_data.csv": [
        ("Demand_MW_z1", "rgba(54, 176, 72, 0.5)")
    ],
    "Generators_variability.csv": [
        ("Solar:0", "rgba(255, 192, 0, 0.5)"),
        ("Wind:0", "rgba(49, 235, 255, 0.5)"),
        ("Wind - New Generic:0", "rgba(49, 235, 255, 0.5)"),
        ("Wind - New Generic:1", "rgba(49, 235, 255, 0.5)"),
        ("Wind - New Generic:2", "rgba(49, 235, 255, 0.5)"),
        ("Wind - New Generic:3", "rgba(49, 235, 255, 0.5)"),
        ("Solar - New Generic:0", "rgba(255, 192, 0, 0.5)"),
        ("Solar - New Generic:1", "rgba(255, 192, 0, 0.5)"),
        ("Solar - New Generic:2", "rgba(255, 192, 0, 0.5)"),
        ("Solar - New Generic:3", "rgba(255, 192, 0, 0.5)"),
    ]
}

def tdr_plots(base_path: UPath, timeseries_to_compare: dict[str, list[str]]):
    # Get TDR settings & period mapping
    with open(base_path / "TDR_results" / "time_domain_reduction_settings.yml", "r") as f:
        tdr_settings = yaml.load(f, Loader=yaml.SafeLoader)

    period_map = pd.read_csv(base_path / "TDR_results"/ "Period_map.csv", dtype=int)

    with open(base_path / "tdr_plots.html", "w") as tdr_plots_file:
        # Get timeseries to plot
        for i, (csv_file, columns) in enumerate(timeseries_to_compare.items()):
            for (column, color_str) in columns:
                df = pd.read_csv(base_path / "system" / csv_file)[["Time_Index", column]]

                df["Period_Index"] = ((df["Time_Index"] - 1) // tdr_settings["TimestepsPerRepPeriod"]) + 1
                df["Hour"] = (df["Time_Index"]) - ((df["Period_Index"] - 1) * 24)
                df["Rep_Period"] = df.merge(period_map, on="Period_Index")["Rep_Period"]

                df = df.merge(
                    df[["Period_Index", "Hour", column]],
                    left_on=["Rep_Period", "Hour"],
                    right_on=["Period_Index", "Hour"],
                    suffixes=["_original", "_sampled"]
                )[[f"{column}_original", f"{column}_sampled"]]
                df.index = pd.Timestamp("1/1/2007") + pd.to_timedelta(df.index, unit="h")

                # Plot
                fig = make_subplots(rows=2, cols=1, subplot_titles=["Chronological", "Duration Curve"], vertical_spacing=0.15)
                fig.update_layout(title_text=f"Time Domain Reduction Comparison:<br><b>{column}", legend_tracegroupgap=180, width=1000, height=500)


                # Chronological
                fig.add_trace(
                    go.Scatter(
                        x=df.index,
                        y=df[f"{column}_original"],
                        name="Original",
                        legendgroup=1,
                        line=dict(
                            color="rgba(20, 20, 20, 0.5)",
                            width=1,
                        ),
                        ),
                    row=1,
                    col=1
                )
                fig.add_trace(
                    go.Scatter(
                        x=df.index,
                        y=df[f"{column}_sampled"],
                        name="Sampled",
                        legendgroup=1,
                        line=dict(
                            color=color_str,
                            width=1,
                        ),
                        ),
                    row=1,
                    col=1
                )

                # Duration curve
                fig.add_trace(
                    go.Scatter(
                        y=df[f"{column}_original"].sort_values(ascending=False, ignore_index=True),
                        name="Original",
                        legendgroup=2,
                        line=dict(
                            color="rgba(20, 20, 20, 0.5)",
                            width=1,
                        ),
                        ),
                    row=2,
                    col=1
                )
                fig.add_trace(
                    go.Scatter(
                        y=df[f"{column}_sampled"].sort_values(ascending=False, ignore_index=True),
                        name="Sampled",
                        legendgroup=2,
                        line=dict(
                            color=color_str,
                            width=1,
                        ),
                        ),
                    row=2,
                    col=1
                )
                fig.show()
                tdr_plots_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn" if i == 0 else None))

tdr_plots(base_path, timeseries_to_compare)
