In [12]:
import xlwings as xw
import pandas as pd
from pydeck.io.html import in_google_colab
from upath import UPath

In [None]:
# Do the xlwings thing where the SharePoint path is updated
# Format this notebook before committing
# Copy Run.jl
# Copy settings
# Check that demand_data, fuel_data, and generator_variability CSVs have the same length

In [3]:
wb = xw.Book('/Users/roderick/Library/CloudStorage/OneDrive-SharedLibraries-ResilientTransition/5.001 Kentucky Resource Council - Documents/Data/Kentucky Load Resource Model.xlsb')

In [112]:
from collections import defaultdict
import os

base_folder = UPath(wb.names["BaseFolder"].refers_to_range.value)

# Get CSV names as a nested dictionary (since some CSVs have been split into multiple separate tables
# Named ranges have the format of [csv file name]...[#]...[optional transformation, either .T or .ffill]
csv_names = defaultdict(list)
for name in wb.names:
    if ".csv" in name.name:
        csv_names[name.name.split("...")[0]].append(name)

for csv_name, ranges in csv_names.items():
    dfs = []
    for range in ranges:
        # Get each range as a dataframe
        df = range.refers_to_range.options(pd.DataFrame, index=0, header=(1 if not range.name.endswith("...T") else 0)).value
        df = df.dropna(how="all", axis=1)
        df = df.dropna(how="all", axis=0)
        if "resource" in df.columns:
            df = df.dropna(subset="resource", axis=0)
        if "drop" in df.columns:
            df = df[df["drop"] != True]

        # Apply optional transform
        if range.name.endswith("...T"):
            df = df.set_index(df.columns[0])
            df = df.T
        elif range.name.endswith("...ffill"):
            df = df.ffill()
        elif range.name.endswith("...drop...1"):
            df = df.iloc[:, [0, -1]]
            df = df.dropna(how="any")
        elif range.name.endswith("...drop...3"):
            df = df.iloc[:, [0, -3, -2, -1]]
            df = df.dropna(how="any")

        if csv_name in [
            "resources\\policy_assignments\\Resource_NQC_derate.csv",
            "resources\\policy_assignments\\ELCC_multipliers.csv"
        ]:
            df = df.rename(columns={"resource": "Resource"})

        # Change types for columns to int
        int_columns = [
            col for col in df.columns if col in
            [
                "can_retire",
                "new_build",
                "model",
                "lds",
                "Time_Index"
            ]
        ]
        df[int_columns] = df[int_columns].astype(int)

        dfs.append(df)

    # Join all the dfs
    final_df = pd.concat([df.reset_index(drop=True) for df in dfs], axis=1)

    # Save joined dataframe to CSV
    filepath = base_folder / csv_name.replace("\\", os.sep)
    filepath.parent.mkdir(parents=True, exist_ok=True)
    final_df.to_csv(filepath, index=False)