In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

# Out of Sample Evaluation

This notebook is dedicated to plotting out-of-sample performance.

### Inventory with secondary assembly

Based on the `tuning_eval.ipynb`, which chose `k1=20`, `k2=20`, `eta1=0.1` and `eta2=1` for the PDSA solver for $\gamma=0.8$. We chose `k1=100`, `k2=20`, `eta1=1`, and `eta2=0.01` for $\gamma=0.99$. Since the subproblems are similar (up to the previous search point), we use these parameters for all EDDP variants we ran.

Previously (before adding bounds on the cost-to-go), the best bounds were `k1=100`, `k2=20`, `eta1=1` and `eta2=0.1`.

In [None]:
def get_eval_seed_data(fname):
    if not os.path.isfile(fname):
        return np.zeros(0)
    arr = np.squeeze(pd.read_csv(fname).to_numpy())
    return arr

In [None]:
folder = "/Users/calebju/Code/hddp/logs/2025_04_02/exp_0"
mode_name_arr = ["Inf-EDDP", "GCE-Inf-EDDP", "C_Inf-SDDP", "SDDP"]

n_exp = 8
n_seeds = 30
arr = np.zeros((n_exp, n_seeds), dtype=float)
for i in range(n_exp):
    mode_name = mode_name_arr[i % 4]
    print("Mode: %s" % mode_name)
    
    cut_arr = get_eval_seed_data(os.path.join(folder, "run_%d/vals.csv" % i))
    if len(cut_arr) == 0:
        print("Exp %d not run yet, skipping" % i)
        continue
    num_cuts = len(cut_arr)
    for j in range(n_seeds):
        data = get_eval_seed_data(os.path.join(folder, "run_%d/eval_seed=%d.csv" % (i,j)))
        if len(data) == 0:
            print("Exp %d not run yet, skipping" % i)
            break
        arr[i,j] = data[-1]

    print("Exp %d cuts: %d" % (i, num_cuts))
    print("Exp %d avg final score: %.16e" % (i, np.mean(arr[i])))
    print("Exp %d std final score: %.16e" % (i, np.std(arr[i])))

The performance between Inf-EDDP (exp 0) and Inf-SDDP (exp 1) is so similar. Let's take their final scores differences.

In [None]:
folder = "/Users/calebju/Code/hddp/logs/2025_04_02/exp_0"

n_exp = 2
n_seeds = 30
arr = np.zeros((2,n_seeds), dtype=float)
for i in range(n_exp):
    num_cuts = len(cut_arr)
    for j in range(n_seeds):
        data = get_eval_seed_data(os.path.join(folder, "run_%d/eval_seed=%d.csv" % (i,j)))
        arr[i,j] = data[-1]

np.diff(arr, axis=0)

Now we plot the performance as a histogram. Since all non-traditional-SDDP methods achieved identical performance, we arbitrarily select GCE-Inf-EDDP.

In [None]:
plt.style.use('ggplot')
bins = 50 * np.arange(16,24) 
ax = plt.subplot()
ax.hist(arr[1], bins=bins, color="gray")

We see they are indeed different, but very very similar. It seems random sampling does not deviate too much from explorative in this problem.

### Heuristics

In [None]:
folder = "/Users/calebju/Code/hddp/logs/2025_03_30/exp_0"
mode_name_arr = ["PID(0)", "PID(25)", "PID(50)", "Myopic"]

n_exp = 8
n_seeds = 30
arr = np.zeros((n_exp, n_seeds), dtype=float)
for i in range(n_exp):
    mode_name = mode_name_arr[i % 4]
    print("Mode: %s" % mode_name)
    early_stop = 0
    for j in range(n_seeds):
        data = get_eval_seed_data(os.path.join(folder, "run_%d/eval_seed=%d.csv" % (i,j)))
        if len(data) == 0:
            print("Exp %d not run yet, skipping" % i)
            early_stop = 1
            break
        arr[i,j] = data[-1]

    if not early_stop:
        # print(arr)
        print("Exp %d avg final score: %.16e" % (i, np.mean(arr[i])))
        print("Exp %d std final score: %.16e" % (i, np.std(arr[i])))

Now we plot the histogram of the best performing one.

In [None]:
ax = plt.subplot()
ax.hist(arr[2], bins=bins, color="gray")