In [1]:
import pandas as pd
import numpy as np

from plant_opt.models.plant_model_stages_recourse_stochastic_cvx import Plant


## Background

This notebook continues on from `compare_stochastic_nb.ipynb`, but using a model implemented in cvxpy, which allows easier implementation of some "advanced" constraints

## Sanity check

Run the model with the exact same inputs to verify creates the same results in the base case:




In [2]:
from plant_opt.scenario_tree.tree import random_walk_tree_builder

stages = 4

variables = [
            "crude_light_price",
            "crude_heavy_price",
            "prod_price_0",
            "prod_price_1",
            "prod_price_2",
            "demand_0",
            "demand_1",
            "demand_2",
        ]

root, all_nodes = random_walk_tree_builder(
        variables,
        [30, 20, 50, 40, 30, 400, 300, 200],
        [1, 1, 1, 1, 1, 30, 30, 30],
        [0, 0, 0, 0, 0, 60, 60, 60],
        stages=stages,
        branch_factor=2,
        seed=42,
        truncate_places=0,
    )

## Model Solving

In [3]:
# Some constants for all models
crude_distil_cap = 1000
products = 3
crude_ratios = [[3, 1], [1, 2], [0, 1]]
refine_caps = [1000, 1000, 1000]
product_ratios = [
            [2, 1, 0.0],  # Amount of light product from l/m/h intermediates
            [0.2, 1.0, 0.2],  # Amount of medium product from l/m/h intermediates
            [0.0, 0.8, 2.0],  # Amount of heavy product from l/m/h intermediates
        ]
allowed_output_change = 20

In [4]:
p_full = Plant(
        crude_distil_cap=crude_distil_cap,
        products=products,
        crude_ratios=crude_ratios,
        refine_caps=refine_caps,
        product_ratios=np.array(product_ratios),
        stages=stages,
        scenario_tree_root=root,
        scenario_tree_all_nodes=all_nodes,
        allowed_output_change=allowed_output_change,
    )

In [5]:
# Basic solving of the models
import cvxpy as cp
result_df = pd.DataFrame(columns=["Model", "Objective Value", "Light Crude Import", "Heavy Crude Import"])

problem = p_full.get_problem()
objective_val = problem.solve(solver=cp.CLARABEL)
result_df.loc[0] = ("Full Model", objective_val, p_full.light_crude_import[root].value, p_full.heavy_crude_import[root].value)


## Results

In [6]:
from IPython.display import display
display(result_df)

Unnamed: 0,Model,Objective Value,Light Crude Import,Heavy Crude Import
0,Full Model,102920.729059,17.333331756156987,154.77778109404323


This matches the results pyomo implementation, validating that the model is implemented in an equivalent way

## Distributions
We will cover a larger range of scenarios and then plot the distribution of outcomes by building an accessory function to calculate the objective value of a given node, and applying it, starting from the terminal nodes and tracing upwards to the root.

The values here are adjusted to reduce the outcome values, to create a chance of loss.

In [7]:
# Create a larger scenario tree and solve
stages = 6
root, all_nodes = random_walk_tree_builder(
        variables,
        [30, 20, 50, 20, 10, 400, 300, 200],
        [1, 1, 1, 1, 1, 30, 30, 30],
        [0, 0, 0, 0, 0, 60, 60, 60],
        stages=stages,
        branch_factor=5,
        seed=42,
        truncate_places=0,
    )
print(f"Generated {len(all_nodes)} nodes")

crude_ratios = [[0.7, 0.3], [0.5, 0.5], [0, 1]]  # light from [light,heavy]; medium from [light,heavy]; heavy from [light,heavy]
product_ratios = [
            [0.7, 0.3, 0.0],  # Amount of light product from l/m/h intermediates
            [0.2, 0.5, 0.2],  # Amount of medium product from l/m/h intermediates
            [0.0, 0.3, 0.7],  # Amount of heavy product from l/m/h intermediates
        ]

p_many = Plant(
        crude_distil_cap=crude_distil_cap,
        products=products,
        crude_ratios=crude_ratios,
        refine_caps=refine_caps,
        product_ratios=np.array(product_ratios),
        stages=stages,
        scenario_tree_root=root,
        scenario_tree_all_nodes=all_nodes,
        allowed_output_change=allowed_output_change,
    )
problem = p_many.get_problem()
problem.solve()

Generated 3906 nodes




np.float64(13311.405397785038)

In [8]:
def calculate_node_objective(m, node):
    node_val = 0
    for output in m.outputs:
        node_val += m.prod_full_price[node][output].value * node.values[f"prod_price_{output}"]
    node_val -= m.light_crude_import[node.parent].value * node.values["crude_light_price"]
    node_val -= m.heavy_crude_import[node.parent].value * node.values["crude_heavy_price"]

    return node_val

def calculate_node_objective_recursive(m, node):
    if node.parent is None:
        # Don't calculate objective value at the root node, as the values at its children will use the import costs from it already
        return 0

    node_val = calculate_node_objective(m, node)

    return node_val + calculate_node_objective_recursive(m, node.parent)

In [9]:
terminal_nodes = [node for node in all_nodes if node.stage == stages - 1]

distributions = pd.DataFrame(columns=["Node", "Objective Value"])

for i, node in enumerate(terminal_nodes):
    distributions.loc[i] = (node.name, calculate_node_objective_recursive(p_many, node))

In [10]:
display(distributions)
print("Bot 5: ", distributions.nsmallest(5, "Objective Value"))
print("Top 5: ", distributions.nlargest(5, "Objective Value"))
print("Average: ", distributions["Objective Value"].mean())

Unnamed: 0,Node,Objective Value
0,root_0_0_0_0_0,13561.904761
1,root_0_0_0_0_1,13528.571428
2,root_0_0_0_0_2,13544.761904
3,root_0_0_0_0_3,13140.476190
4,root_0_0_0_0_4,13140.476190
...,...,...
3120,root_4_4_4_4_0,14355.396825
3121,root_4_4_4_4_1,14488.253968
3122,root_4_4_4_4_2,14488.253968
3123,root_4_4_4_4_3,14311.111110


Bot 5:                 Node  Objective Value
157  root_0_1_1_1_2      4799.206348
156  root_0_1_1_1_1      5182.063491
151  root_0_1_1_0_1      6186.349205
158  root_0_1_1_1_3      6360.634920
466  root_0_3_3_3_1      6842.906836
Top 5:                  Node  Objective Value
2653  root_4_1_1_0_3     15256.984126
2652  root_4_1_1_0_2     15252.698412
2665  root_4_1_1_3_0     15239.682539
2667  root_4_1_1_3_2     15235.396825
2656  root_4_1_1_1_1     15233.253968
Average:  13311.405397785058


In [11]:
# Plot histogram
import numpy as np
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')

frequencies, edges = np.histogram(distributions["Objective Value"], 20)
print(frequencies, edges)
hv.Histogram((edges, frequencies))

%opts magic unavailable (pyparsing cannot be imported)
%compositor magic unavailable (pyparsing cannot be imported)


[  2   0   2   1   3   2   6   4   6  18  16  35  88 157 287 472 812 687
 433  94] [ 4799.20634844  5322.09523734  5844.98412624  6367.87301514
  6890.76190404  7413.65079294  7936.53968184  8459.42857074
  8982.31745964  9505.20634854 10028.09523744 10550.98412634
 11073.87301524 11596.76190414 12119.65079304 12642.53968194
 13165.42857084 13688.31745974 14211.20634864 14734.09523754
 15256.98412644]


In [12]:
print(sum(distributions["Objective Value"] < 9000))
print(len(distributions))
print(10/len(distributions))

20
3125
0.0016


## Chance constraint
Now, we can attempt applying a chance constraint, which will enforce that 99.7% of the outcome values are >9000
(i.e. 10 outcomes, compared to the 20 that happened above).

In [13]:
p_chance = Plant(
        crude_distil_cap=crude_distil_cap,
        products=products,
        crude_ratios=crude_ratios,
        refine_caps=refine_caps,
        product_ratios=np.array(product_ratios),
        stages=stages,
        scenario_tree_root=root,
        scenario_tree_all_nodes=all_nodes,
        allowed_output_change=allowed_output_change,
        chance=(9000, 0.997),
    )
problem = p_chance.get_problem()
problem.solve()

distributions = pd.DataFrame(columns=["Node", "Objective Value"])

for i, node in enumerate(terminal_nodes):
    distributions.loc[i] = (node.name, calculate_node_objective_recursive(p_chance, node))

frequencies, edges = np.histogram(distributions["Objective Value"], 20)
hv.Histogram((edges, frequencies))



In [14]:
print(sum(distributions["Objective Value"] < 9000))
print(sum(distributions["Objective Value"] < 9000) / len(distributions))

12
0.00384


We see that the number of outcomes with objective < 9000 is 12, the discrepancy is investigated below:

In [22]:
print(problem.status)
print(p_chance.b.value)
print(sum(p_chance.b.value == 0))
values = np.array([x.value for x in p_chance.terminal_values.values()])
print(sum(values < 9000))

series_comp = pd.DataFrame()
series_comp["values"] = values
series_comp["mask"] = p_chance.offset_mask.value

print(series_comp[series_comp["values"] < 9000])

optimal
[1. 1. 1. ... 1. 1. 1.]
9
11
         values       mask
151  7068.57143    0.00000
156  5980.00000    0.00000
157  5635.71429    0.00000
158  7248.57143    0.00000
159  8058.57143    0.00000
180  7990.00000    0.00000
183  9000.00000 9000.00000
466  7703.92959    0.00000
1627 9000.00000 9000.00000
1628 8250.00000    0.00000
2992 7544.51770    0.00000


As can be seen, in two of the cases, the objective value is extremely close to 9000, even though it is technically below; a rounding/tolerance effect of the model. The constraint is confirmed to be working, reducing the numbers of outcomes that were <9000 from 20 to approximately 10.

In [23]:
print(distributions["Objective Value"].mean())

13259.65191756226


The new average objective value of `13260` is slightly less than the original value (prior to the chance constraint) of `13311`, which makes sense, as the model has been forced to trade maximizing the objective in favor of eliminating some of the worst case scenarios.