In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
if not os.path.isdir("cluster_activity_files"):
    os.mkdir("cluster_activity_files")
from i2x.der_hca import hca, islands as isl, PlotUtils as pltutl
import numpy as np
import pandas as pd

# Sequential vs. Cluster Considerations
The purpose of this activity is to highlight the impact of considering interconnection requests sequentially versus in clusters.

**Note:**<br>
Determining the hosting capacity at multiple locations is non-trivial and in its most complete and general form becomes a highly non-linear optimization problem.
Instead of solving this problem, we'll show how the hosting capacity changes depending on the sequencing.
We'll then show how batching several changes and considering upgrades _afterwards_ changes the way some of the ensuing costs may be viewed.

## Part 1: Location and Order Matters!
### Setting a baseline
To set a baseline, we'll first run the hosing capacity at each location _independently_.
This is not an uncommon approach, but it means that each location does not take into account the resource additions anywhere else on the feeder.
As it treats each location independently, this approach can serve as a good reference when trying to understand the impact of adding multiple resources at once.

> **Note:**
> This step can take a few minutes. If your computer is acting up or you simply don't feel like waiting, a saved version of the result is under [saved_resources/independet_hca.pkl](./saved_resources/independent_hca.pkl). Simply copy that file into the `cluster_activity_files` folder and everything else should work.

In [None]:
### Run this cell to actually perform the independent HCA
logger_header = f"\n************* Order Independent HCA ******************\n"
feeder = hca.HCA("./configs/cluster_activity.json", logger_heading=logger_header)
feeder.runbase()

## For each bus, calculate HC but don't add anything to feeder.
for b in feeder.graph_dirs["bus3phase"]:
    feeder.hca_round("pv", bus=b, recalculate=True)
feeder.save("./cluster_activity_files/independent_hca.pkl")

In [None]:
### Run this cell to load the saved result
logger_header = f"\n************* Loading Independent HCA ******************\n"
feeder = hca.HCA("./cluster_activity_files/independent_hca.pkl", reload=True, reload_filemode="a",
                 logger_heading=logger_header)

We can plot the hosting capacity on the feeder (hover over the individual buses to see the values).

In [None]:
## Plot HCA
pltutl.hc_plot(feeder, "./cluster_activity_files/independent_hca", auto_open=True)

We can also query the hosting capacity at a particular bus (or at all buses).

> **Note:**
> To query all buses simply replace the `bus` argument with `None`
> The output just becomes rather large.

In [None]:
bus = "bus_1112"
if bus is None:
    hc = feeder.get_data("hc", "pv")
    display(hc)
else:
    hc, cnt = feeder.get_data("hc", "pv", bus=bus, cnt=0)
    print(f"Hosting Capacity at bus {bus} is {round(hc['kw'],2)} kW")

### Stochastic resource addition
In the first experiment, we continually add resources, sampling both location and capacity randomly.
We stop whenever a resource encounters a limitation and is therefore not able to interconnect its full desired capacity (without some upgrade needs).

First, we will consider the addition of several resources and simply shuffle the order in which they are added to the system.

In [None]:
### run the experiment 3 times
rng = np.random.default_rng(0) # random number generator with seed specified for reproducibility
for i in range(3):

    ### reload the feeder with the independent HCA data
    logger_header = f"\n************* Re-Loading Independent HCA, Order Experiment Round {i} ******************\n"
    feeder = hca.HCA("./cluster_activity_files/independent_hca.pkl", reload=True, reload_filemode="a",
                    logger_heading=logger_header)
    if i > 0:
        ### permuting the order of addition
        # using feeder random state for reproducibility
        resource_order = rng.permutation(len(added_resources))
        while resource_order[-1] == len(resource_order) - 1:
            # make sure we don't put the original resource causing violation last
            resource_order = rng.permutation(len(added_resources))
        j = 0
    ### add resources until one is limited (hc = 0)
    while True:
        if i == 0:
            ## on first round sampling is done randomly
            feeder.hca_round("pv")
        else:
            ## on subsequent rounds sampling is based on a reordering of the first round
            bus, Sij = added_resources[resource_order[j]]
            j += 1
            feeder.hca_round("pv", bus=bus, Sij=Sij)
        ## get the hosting capacity for the last bus considered, if zero, then likely capacity was limited
        hc, cnt = feeder.get_hc("pv", feeder.visited_buses[-1], feeder.cnt)
        if hc["kw"] == 0:
            ## there are violations with initial desired capacity
            # reload last step
            feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}.pkl", reload=True, 
                                reload_filemode="a", logger_heading="\n*****Re-loading pre capacity limitation******\n")
            if i == 0:
                feeder.hca_round("pv", allow_violations=True)
            else:
                feeder.hca_round("pv", bus=bus, Sij=Sij, allow_violations=True) #re-run allowing for violations
            feeder.save(f"./cluster_activity_files/cluster_activity_stoch_i{i}.pkl") #save again for later intialization
            break
        else:
            ## save for later re-wind
            feeder.save(f"./cluster_activity_files/cluster_activity_stoch_i{i}.pkl")

    if i == 0:
        ## on the first iteration we save the resources added so we can reapply the same ones
        added_resources = []
        for j in range(1, feeder.cnt+1):
            bus = feeder.visited_buses[j-1]
            sij, _ = feeder.get_data("Sij", "pv", bus=bus, cnt=j)
            added_resources.append((bus, sij))

First, let's just look at the results.
We can look at:
* How much capacity is integrated onto the feeder at the time of the violation?
* Where is the the resource triggering the violation?
* What sort of violation is triggered?

In [None]:
i = 2
feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}.pkl", 
                 reload=True, reload_filemode="a", 
                 logger_heading=f"\n*****Re-loading post-violation stochastic round {i}******\n")
feeder.metrics.load_res(feeder.lastres)
feeder.metrics.calc_metrics()
installed_kw = feeder.data["Stotal"][feeder.cnt].loc["pv", "kw"]
feeder.logger.info(f'Installed Capacity:\n {installed_kw:0.2f} kW')
hca.print_config(feeder.metrics.violation, printf=feeder.logger.info, title="Violations")
pltutl.hc_plot(feeder, f"./cluster_activity_files/cluster_activity_stoch_i{i}_step1",
            highlight_nodes={feeder.visited_buses[-1]: f"{','.join(feeder.metrics.last_violation_list)}"},
            title=f"Stochastic Round {i}: Total Installed Capacity {installed_kw:0.2f} kW",
            auto_open=True)

### Comparison to independent hosting capacity
We can compare the initial, independent hosting capacity, to the one calculated with the resources added.
Since the last resource added in each round causes a violation, we do this by removing the last resource and re-calculating the hosting capacity in this state.
Rather than perform the recalculation for _all_ buses in the feeder, we focus just on the ones where resources were added in any of the three experiments.
Comparing these results over all 4 cases (independent calculation + 3 experiments) illustrates directly how the hosting capacity is affected by sequencing.

>**Note:** In this analysis the hosting capacity for resources that _were_ added to the circuit is given for the point in time where they were added.
>Resources that were added in the first run but not the others are recalculated on the last round with no iteration.

In [None]:
out = {}
for i in range(3):
    ### Load run
    feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}.pkl", 
                    reload=True, reload_filemode="a",
                    logger_heading=f"\n***** Difference to Independent Method {i} *****\n")
    feeder.metrics.load_res(feeder.lastres)
    feeder.metrics.calc_metrics()
    if i == 0:
        buslist = list(reversed(feeder.visited_buses))
    else:
        buslist = [feeder.visited_buses[-1]] + [b for b in buslist if b != feeder.visited_buses[-1]]
    ### loop over buses except for the last where the HC is 0 since we have a violation
    for j, b in enumerate(buslist):
        if j == 0:
            ### for the last bus we'll undo the addition and recalculate the HCA since
            ### What is on the feeder right now causes a violation
            feeder.undo_hca_round("pv", b, feeder.cnt)
            feeder.hca_round("pv", bus=b, recalculate=True)
        elif b not in feeder.visited_buses:
            ### This bus wasn't reached in this experiment, recalculate its hosting capacity BEFORE 
            ### The violation triggering addition is made
            feeder.hca_round("pv", bus=b, recalculate=True)
        if b not in out:
            hc, cnt = feeder.get_hc("pv", b, cnt=0) # get the hosting capacity
            
            out[b] = {"independent": hc["kw"]}
        
        hc, cnt = feeder.get_hc("pv", b) # get the hosting capacity
        if b not in feeder.visited_buses:
            # undo_hca_round removes from visited buses
            out[b][f"Sequence {i}"] = hc["kw"]
        else:
            sij, cnt = feeder.get_data("Sij", "pv", b, cnt) # get the added capacity
            out[b][f"Sequence {i}"] = hc["kw"] + sij["kw"]

In [None]:
### Display and Plot
df = pd.DataFrame.from_dict(out, orient="index").sort_values(by="independent", axis=0)
display(df)
df.plot.bar(barmode='group', backend='plotly',
            labels={"value": "Hosting Capacity [kW]", "index": "bus"},
            title="Difference In Hosting Capacity by Sequence")

### Next Steps
The independent HCA presents an _upper bound_ for the hosting capacity.
As more resources are added, depending on their location and size w.r.t. any given bus, the resulting hosting capacity may be impacted more or less.

We simulated each of the three experiments as a sequence of additions, however, we can also think about the final state as a _single cluster_ of resources, under study that result in a violation.
In the [next section](#part-2-upgrades) we consider the upgrades necessary for each of the of experiments and then turn our attention to the implication of the sequential vs. cluster framing in terms of costs in [Part 3](#part-3-cost-sharing).

## Part 2: Upgrades
We've already seen in the [voltage difference activity](./vdiff_activity.ipynb) that for the $\Delta V$ violations, upgrading the conductors, as well as activating advanced inverter functions can help to alleviate the violations by forming a stronger connection to the feeder source.

To keep from conflating too many factors, we'll focus on the conductor upgrades here, and leave out the advanced inverter functions.
The reason fro this is that the inverter functions would impact the underlying independent hosting capacity results as well.

For each of our three experiments, we'll perform the upgrades, moving towards the feeder head, until no more violations are encountered.

In [None]:
#########################################################################
####### these are some functions to get the path and perform the upgrades
#########################################################################
def get_upgrade_path(feeder:hca.HCA):
    """get the path from the furthest location from the source bus, where
    a voltage difference problem occurs, to the source bus
    """
    ### collect locations where voltage difference is violated
    vdiff_buses = list(feeder.metrics.get_vdiff_locations()["v"].keys())
    sources = []
    upgrade_paths = []
    path_lengths = []
    ### for each location with a violation, find the path to the source bus
    for b in vdiff_buses:
        nearest_source, path2source = isl.get_nearest_source(feeder.G.to_undirected(), b)
        sources.append(nearest_source)
        upgrade_paths.append(path2source)
        path_lengths.append(len(path2source))
    ### select the maximum distance to the source bus.
    idx = np.argmax(path_lengths)
    return upgrade_paths[idx]

def upgrade_until_no_violation(feeder:hca.HCA, upgrade_path:list, multiupgrade=True):
    """Iterate over the path to the source and upgrade the conductor 
    until no violations
    """
    for u,v in zip(upgrade_path[1:], upgrade_path[:-1]):
        eclass = feeder.G.edges[u,v]["eclass"]
        ename = feeder.G.edges[u,v]["ename"]
        if eclass.lower() == "line":
            if not multiupgrade:
                if feeder.get_data("upgrades", "line", ename)[0] is not None:
                    # segment already updated
                    continue
            feeder.upgrade_line(ename)
        
        ## run dss
        feeder.reset_dss(clear_changes=False)
        feeder.rundss()
        if not feeder.lastres["converged"]:
            raise ValueError("Open DSS did not converge")
        
        feeder.metrics.load_res(feeder.lastres)
        feeder.metrics.calc_metrics()
        
        ## check if there are any violations
        if feeder.metrics.violation_count == 0:
            break

In [None]:
for i in range(3):
    ### Load run
    feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}.pkl", 
                    reload=True, reload_filemode="a",
                    logger_heading=f"\n***** Performing Upgrades for Run {i} *****\n")
    feeder.metrics.load_res(feeder.lastres)
    feeder.metrics.calc_metrics()

    ### get the path to upgrade
    upgrade_path = get_upgrade_path(feeder)
    feeder.logger.info(f"Furthest violation from substation on bus {upgrade_path[0]}")

    ### Perform the upgrades
    upgrade_until_no_violation(feeder, upgrade_path)
    feeder.logger.info(f"Estimated upgrade cost:")
    new_upgrades = feeder.get_data('upgrades', 'line', cnt=feeder.cnt)
    feeder.logger.info(f"\tLines: ${new_upgrades['cost'].sum():0.2f} | {new_upgrades.apply(lambda x: x.length*hca.conductor_cost.units2ft(x.length_unit), axis=1).sum():0.2f} ft")
    
    ### Update hosting capacity at all buses under consideration:
    if i == 0:
        buslist = feeder.visited_buses.copy()
    for b in buslist:
        feeder.hca_round("pv", bus=b, recalculate=True)
    pltutl.upgrade_plot(feeder, f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade1", include_plotlyjs='cdn', auto_open=True)
    feeder.save(f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade1.pkl")

Next we gather some summary statistics for each of the experiments and look at:
* _Total installed capacity_: total added capacity to the feeder;
* _Triggering capacity_: the capacity of the last unit that triggers the updates;
* _Total upgrade costs_: total costs needed to remove violations under the presence of the _Total installed capacity_;

and consider how these variables relate to each other.

In [None]:
out = {}
for i in range(3):
    ### Load run

    feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade1.pkl", 
                    reload=True, reload_filemode="a")
    feeder.metrics.load_res(feeder.lastres)
    feeder.metrics.calc_metrics()

    out[f"sequence {i}"] = {}
    ### The upgrades
    out[f"sequence {i}"].update(
        feeder.get_data("upgrades", "line").agg({
                                         "cost": "sum", 
                                         "length": "sum",
                                         "length_unit": "unique",
                                         "cnt": "count"}).to_dict()
    )
    ### Total installed capacity
    out[f"sequence {i}"]["total kw"] = feeder.data["Stotal"][feeder.cnt].loc["pv", "kw"]

    ### triggering unit
    bus = feeder.visited_buses[-1]
    bus_kw= feeder.get_data("Sij", "pv", bus, feeder.cnt)[0]["kw"]
    out[f"sequence {i}"].update({"trigger bus": bus, "trigger kw": bus_kw})

df = pd.DataFrame(out)
print("Tabular Comparison")
display(df)

### Total Installed Capacity vs. Cost
The way the experiment was set up, the first run will always have the most installed capacity.
This is because the total capacity was limited to the set of resources that was integrated in the first run.
The motivation for this setup is that each run can represent different realizations of resources entering the queue, while at the same time the full set can be treated separately as a single cluster.
Nonetheless, it is interesting to see how the total installed capacity relates to the total upgrade costs, as plotted in the figures below.

In [None]:
txt_labels= [f'{df.T["total kw"][idx]:0.1f} kW (total)<br>${df.T["cost"][idx]:0.2f}' for idx in df.T.index]
fig = df.loc[["cost", "total kw"]].T.assign(cost_per_kw=lambda x: x["cost"]/x["total kw"]).plot.bar(backend='plotly', 
            x=df.T.index, y='cost_per_kw',
            color=df.T.index,
            text = txt_labels ,
            labels={"cost_per_kw": "$/kW", "x": "experiment", "color": "experiment"},
            title="Cost per Installed kW (total)")
fig.update_layout(width=800)
fig.show()

### Triggering Capacity vs. Cost
In a standard sequential queue process the triggering unit is generally responsible for upgrade costs. 
It is therefore interesting to consider what those costs are w.r.t the capacity of the triggering unit.

Recall that in both the first and last experiment, the unit on `bus_1113` triggers the upgrades, while in the second experiment it is the unit on `bus_1112`.

In [None]:
txt_labels= [f'{df.T["trigger kw"][idx]} kW (trigger)<br>${df.T["cost"][idx]}' for idx in df.T.index]
fig = df.loc[["cost", "trigger kw"]].T.assign(cost_per_kw=lambda x: x["cost"]/x["trigger kw"]).plot.bar(backend='plotly', 
            x=df.T.index, y='cost_per_kw',
            color=df.T.index,
            text = txt_labels,
            labels={"cost_per_kw": "$/kW", "x": "experiment", "color": "experiment"},
            title="Cost per Installed kW (triggering)")
fig.update_layout(width=800)
fig.show()

### Comparison of cost per capacity
Finally, we compare the $/kW for the total capacity and triggering capacity cases.
As expected, when associated with the triggering capacity, the costs are several times greater.

In [None]:
df_comp = df.loc[["cost", "total kw", "trigger kw"]].transpose().assign(
    cost_per_kw_tr=lambda x: x["cost"]/x["trigger kw"],
    cost_per_kw_to=lambda x: x["cost"]/x["total kw"]).loc[:, ["cost_per_kw_tr", "cost_per_kw_to"]]

txt = [f'{df_comp["cost_per_kw_tr"][idx]/df_comp["cost_per_kw_to"][idx]:0.1f}x' for idx in df_comp.index]
fig = pd.DataFrame(df_comp.to_dict()).plot.bar(backend='plotly', barmode="group",
                        labels={"value": "$/kW", "index": "experiment"},
                        title="Comparison of $/kW for total and triggering capacity")
fig.data[0].text = txt
fig.show()

### Continuing the queue
The previous comparisons are not exactly "fair", in that they are essentially comparing the system between different stages of evolution.
They help illustrate that depending on which resources are integrated, where they are integrated, and when they are integrated, upgrades occur at different times and magnitudes, triggered by different units.

When considering sequential versus cluster processes, what we're _really_ trying to focus on is the impact of _time_.
To better isolate the time dimension from our experiments, for sequences 2 and 3, we continue to add resources, targeting the same capacity as the first experiment and add upgrades as they occur.
The result are three experiments with identical installed capacity.
Because of the simplified upgrade methodology used, the results also have exactly the same set of final upgrades.
Therefore, the only difference is _when_ the upgrades take place, and thus, which units are responsible for them.

In [None]:
####################################################
### Upgrade Runs 2 and 3 to same capacity as run 1
#####################################################
### get the resource list
feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i0_upgrade1.pkl", 
                    reload=True, reload_filemode="a")
resources = feeder.get_data("Sij", "pv")

# random number generator with seed specified for reproducibility
# Note: because we are seeding and using the random number generator in the same order as during the generation
# of these runs, the resource orders created here will match those from before.
rng = np.random.default_rng(0) 

for i in range(1,3):
    resource_order = rng.permutation(resources.shape[0])
    while resource_order[-1] == len(resource_order) - 1:
        # make sure we don't put the original resource causing violation last
        resource_order = rng.permutation(len(resource_order))
    ### Load run
    logger_header = f"\n************* Loading upgraded run Sequence {i}******************\n"
    feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade1.pkl", 
                    reload=True, reload_filemode="a",
                    logger_heading=logger_header)
    feeder.metrics.load_res(feeder.lastres)
    feeder.metrics.calc_metrics()
    upgrade_round = 2
    while feeder.cnt < resources.shape[0]:
        
        ## get bus and capacity
        bus = resources.iloc[resource_order].iloc[feeder.cnt].name
        Sij = resources.iloc[resource_order].loc[bus, ["kw", "kva"]].to_dict()
        
        ## add bus and check for violations
        feeder.hca_round("pv", bus=bus, Sij=Sij, allow_violations=True)

        if feeder.metrics.violation_count > 0:
            # violations occured
            if not "voltage_vdiff" in feeder.metrics.last_violation_list:
                feeder.logger.info(f"Expected voltage difference violation but encountered {feeder.metrics.last_violation_list}.")
                feeder.logger.info(f"saving to ./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade2.pkl and exiting.")
                feeder.save(f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade{upgrade_round}.pkl")
                break
            
            feeder.logger.info(f"\n******* Starting upgrade round {upgrade_round} *********\n")
            ## get upgrade path
            upgrade_path = get_upgrade_path(feeder)
            feeder.logger.info(f"Furthest violation from substation on bus {upgrade_path[0]}")
            
            ### Perform the upgrades
            ### assumption: we're not going to upgrade segments twice
            upgrade_until_no_violation(feeder, upgrade_path, multiupgrade=False)
            feeder.logger.info(f"Estimated upgrade cost (upgrade round {upgrade_round}, hca round {feeder.cnt}):")
            new_upgrades = feeder.get_data('upgrades', 'line', cnt=feeder.cnt)
            feeder.logger.info(f"\tLines: ${new_upgrades['cost'].sum():0.2f} | {new_upgrades.apply(lambda x: x.length*hca.conductor_cost.units2ft(x.length_unit), axis=1).sum():0.2f} ft")
            
            ### save
            feeder.save(f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade{upgrade_round}.pkl")
            upgrade_round += 1

In [None]:
df = {}
for i in range(3):
    j = 1
    while True:
        try:
            feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade{j}.pkl", 
                        reload=True, reload_filemode="a")
            j += 1
        except FileNotFoundError:
            break
    if i == 0:
        colors = pltutl.ColorList()
        cmap = {b: colors.colors[i] for i,b in enumerate(feeder.visited_buses)}
    df[i] = feeder.get_data("Sij", "pv").reset_index(names="bus").merge(
        feeder.get_data("upgrades", "line"), 
            how="inner", on="cnt").groupby("bus").agg(
                {"kw": "first", "cost": "sum"}).assign(**{"$/kw": lambda x: x.cost/x.kw})

fig = pltutl.make_subplots(2,3, shared_xaxes=True, shared_yaxes=True,
                            subplot_titles=("sequence 1", "sequence 2", "sequence 3"))
for i in range(3):
    fig.add_trace(pltutl.go.Bar(x=df[i].index, y=df[i]["cost"],
                                name="cost[$]", customdata=df[i]["kw"].values,
                                marker_color=[cmap[idx] for idx in df[i].index],
                                hovertemplate="%{x}<br>$%{y:0.2f}<br>%{customdata} kW"), row=1,col=i+1)
    fig.add_trace(pltutl.go.Bar(x=df[i].index, y=df[i]["$/kw"], 
                                name="$/kW", customdata=df[i]["kw"].values,
                                marker_color=[cmap[idx] for idx in df[i].index],
                                hovertemplate="%{x}<br>$%{y:0.2f}/kW<br>%{customdata} kW"), row=2,col=i+1)
fig.update_layout(title="Upgrade Costs", showlegend=False, width=800, height=600)
fig.update_yaxes(title_text="cost [$]", row=1, col=1)
fig.update_yaxes(title_text="$/kW", row=2, col=1)

The figure clearly illustrates the vast differences between different queue realizations.
Bus 1113 is responsible for between $0 and 3 million dollars worth of upgrades for the installation of 567 kW unit.

This leads to the final section of this exercise, where we consider how hosting capacity analysis might be use to allocate costs between multiple projects within a cluster.

## Part 3: Cost Sharing
As seen in the previous section, the necessary upgrades are attributed to different resources, depending on sequencing.
If we consider all resources as part of a single cluster, we can use the hosting capacity analysis to understand the benefit that the upgrades bring to each location.
The objective of this section is to come up with a quantifiable methodology for allocating the costs of necessary upgrades, while taking out the strong variability due to queue sequence.

There are different ways to go about this. but we stick here to the _independent_ hosting capacity, because the the other calculation methods combine the impact of the other resources inherently.
As a first step, we would like to isolate the impact/benefit of a set of upgrades on an individual location.
To achieve this, we perform the updates, and then recalculate the independent hosting capacities.

Cost sharing is determined in the following way:
1. We calculate a $\Delta HC_i$ for each location, $i$.
2. From $\Delta HC_i$ we get the percent change in HC as $\Delta HC_i/HC_i^0$, where $HC_i^0$ is the pre-upgrade hosting capacity at location $i$.
3. Next, the installed capacity is weighted by the hosting capacity percent change, to give a weighted-installed-capacity: $w_i[kW] = P_i \cdot \Delta HC_i/HC_i^0$
4. Finally, the cost share is allocated based on the weighted-installed-capacity: $c_i[\$] = c_{total}[\$] * w_i/\sum_i w_i$  

Using this approach, the benefit of the upgrades to each project are weighted, as well as the actual capacity to be installed.

The cost sharing calculation is performed for the first sequence only, since, [as noted before](#continuing-the-queue), the final upgrades and total cost are equal for the three sequences once all units/upgrades have been performed.

In [None]:
######### Independent HC Change ###########
### Load feeder with upgrades
feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i0_upgrade1.pkl", 
                    reload=True, reload_filemode="a")
logger_header = f"\n************* Loading Independent HCA: Cost Share Analysis Sequence 0******************\n"
feeder_ind = hca.HCA("./cluster_activity_files/independent_hca.pkl", reload=True, reload_filemode="a",
                logger_heading=logger_header)

### copy upgrades from snapshot with upgrades to the independent HCA snapshot
feeder_ind.copy_upgrades(feeder)

# increment HCA round to not overwrite data
feeder_ind.cnt += 1 

### Iterate over the added resource buses and recalculate the HCA
for b in feeder.visited_buses:
    ## initialize HC search with last hc value
    hc0 = feeder_ind.get_hc("pv", b, cnt=0)[0]
    feeder_ind.hca_round("pv", bus=b, Sij=hc0, recalculate=True)

### now actually add the resources so they are available in this instance
for b in feeder.visited_buses:
    Sij = feeder.get_data("Sij", "pv", bus=b)[0]
    # add the resources (no need to iterate and find the HC)
    feeder_ind.hca_round("pv", bus=b, Sij=Sij, hciter=False, allow_violations=True)
    feeder_ind.cnt -=1 # keep all resources at last iteration
feeder_ind.cnt += 1
feeder_ind.save("./cluster_activity_files/cluster_activity_cost_share.pkl")


In [None]:
#### Gather the results and calculate the cost share
feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_cost_share.pkl", 
                    reload=True, reload_filemode="a")

out = {"bus":[], "hc_pre":[], "hc_post":[], "hc_delta": []}
for b in feeder.visited_buses:
    hc0 = feeder_ind.get_hc("pv", b, cnt=0)[0] #round 0 is the initial
    hc1 = feeder.get_hc("pv", b, cnt=1)[0]     #round 1 is after the updates (round 2 has resources added)
    out["bus"].append(b)
    out["hc_pre"].append(hc0["kw"])
    out["hc_post"].append(hc1["kw"])
    out["hc_delta"].append(hc1["kw"] - hc0["kw"])

### Cost Share
# get the percent HC change
# weight the installed capacity by the percent HC change
# cost share is weighted installed capacity / sum weighted installed capacity
# cost is total cost * cost share.
out = pd.DataFrame(out).set_index("bus").join(feeder.get_data("Sij", "pv")).assign(
    per_delta = lambda x: 100*x["hc_delta"]/x["hc_pre"],
    w_cap=lambda x: x.per_delta*x.kw/100,
    cost_share=lambda x: x.w_cap/sum(x.w_cap),
    cost=lambda x: x.cost_share * feeder.get_data("upgrades", "line")["cost"].sum(),
    **{"$/kw": lambda x: x.cost/x.kw}
)

#### Plot
colors = pltutl.ColorList()
cmap = {b: colors.colors[i] for i,b in enumerate(feeder.visited_buses)}
fig = pltutl.make_subplots(2,1, shared_xaxes=True, shared_yaxes=True)

fig.add_trace(pltutl.go.Bar(x=out.index, y=out["cost"],
                            name="cost[$]", customdata=out["kw"].values,
                            marker_color=[cmap[idx] for idx in out.index],
                            hovertemplate="%{x}<br>$%{y:0.2f}<br>%{customdata} kW"), row=1,col=1)
fig.add_trace(pltutl.go.Bar(x=out.index, y=out["$/kw"], 
                            name="$/kW", customdata=out["kw"].values,
                            marker_color=[cmap[idx] for idx in out.index],
                            hovertemplate="%{x}<br>$%{y:0.2f}/kW<br>%{customdata} kW"), row=2,col=1)
fig.update_layout(title="Upgrade costs with HCA based cost sharing", showlegend=False, width=800, height=600)
fig.update_yaxes(title_text="cost [$]", row=1, col=1)
fig.update_yaxes(title_text="$/kW", row=2, col=1)
fig.show()

### Comparison to sequential process
The costs determined based on the $\Delta HC$ concept are now compared to the sequence based costs from [the previous section](#continuing-the-queue).

In [None]:
df = {}
for i in range(3):
    j = 1
    while True:
        try:
            feeder = hca.HCA(f"./cluster_activity_files/cluster_activity_stoch_i{i}_upgrade{j}.pkl", 
                        reload=True, reload_filemode="a")
            j += 1
        except FileNotFoundError:
            break
    
    df[i] = feeder.get_data("Sij", "pv").reset_index(names="bus").merge(
        feeder.get_data("upgrades", "line"), 
            how="inner", on="cnt").groupby("bus").agg(
                {"kw": "first", "cost": "sum"}).assign(**{"$/kw": lambda x: x.cost/x.kw})

df[3] = out
### plot
fig = pltutl.make_subplots(2,4, shared_xaxes=True, shared_yaxes=True,
                            subplot_titles=("sequence 1", "sequence 2", "sequence 3", "HCA Cost Sharing"))
for i in range(4):
    fig.add_trace(pltutl.go.Bar(x=df[i].index, y=df[i]["cost"],
                                name="cost[$]", customdata=df[i]["kw"].values,
                                marker_color=[cmap[idx] for idx in df[i].index],
                                hovertemplate="%{x}<br>$%{y:0.2f}<br>%{customdata} kW"), row=1,col=i+1)
    fig.add_trace(pltutl.go.Bar(x=df[i].index, y=df[i]["$/kw"], 
                                name="$/kW", customdata=df[i]["kw"].values,
                                marker_color=[cmap[idx] for idx in df[i].index],
                                hovertemplate="%{x}<br>$%{y:0.2f}/kW<br>%{customdata} kW"), row=2,col=i+1)
fig.update_layout(title="Upgrade Costs", showlegend=False, width=800, height=600)
fig.update_yaxes(title_text="cost [$]", row=1, col=1)
fig.update_yaxes(title_text="$/kW", row=2, col=1)

In [None]:
fig = pd.concat(
    [df[i]["cost"] for i in range(4)], 
    keys=["sequence 1", "sequence 2", "sequence 3", "HCA Cost Sharing"], axis=1).fillna(0).T.plot.bar(
        backend='plotly',
        labels={"value": "cost [$]", "index": "experiment"},
        title="Cost Sharing vs. Sequential Processing"
    )
for d in fig.data:
    d.marker.color = cmap[d.name]
fig.show()

# Backup

In [None]:
# fig = pd.concat([out[i]["cost"] for i in range(3)], keys=[f"sequence {i}" for i in range(3)], axis=1).fillna(0).plot.bar(
#     barmode='group', backend='plotly',
#     labels={"value": "cost [$]", "variable": "experiment"},
#     title="Delta HC based cost share")

In [None]:
# txt_labels = [f'{out[i]["kw"].sum()} kW<br>(total)' for i in range(3)]
# fig = pd.concat([out[i]["cost"] for i in range(3)], keys=[f"sequence {i}" for i in range(3)], axis=1).fillna(0).T.plot.bar(
#     backend='plotly',
#     labels={"value": "cost [$]", "index": "experiment"},
#     title="Delta HC based cost share: distribution within clusters")
# fig.data[-1].text = txt_labels
# fig.show()

In [None]:
# # scatter figure
# fig = df.loc[["cost", "total kw"]].transpose().plot.scatter(backend='plotly',
#     x="total kw", y="cost", color=df.T.index,
#     labels={"cost" : "cost [$]", 
#             "total kw": "total installed capacity [kW]",
#             "index": "experiment"},
#     title="Total Installed Capacity vs. Cost")
# fig.update_traces(marker_size=10)
