In [None]:
import numpy as np
import matplotlib.pyplot as plt

DATA_DIR = "../dataset/Data_1k_sets/Data_1k_rng1/"

params = np.loadtxt(
    DATA_DIR + "LHS_parameters_m.txt", delimiter=","
)  # input [n_samples, 31]
iV = np.loadtxt(
    DATA_DIR + "iV_m.txt", delimiter=","
)  # output [n_samples, n_points = len(Va)]
Va = np.concatenate((np.arange(0, 0.41, 0.1), np.arange(0.425, 1.401, 0.025)))

# original voltage grid from COMSOL
voltage_raw = np.concatenate((np.arange(0, 0.41, 0.1), np.arange(0.425, 1.401, 0.025)))

The `reduce_curve` function will take a curve on the `voltage_raw` axis:

1. First interpolate it to a specified number of points on the `high_res_V` axis (in this case 10,000 points)

2. Then, find the zero-cross and MPP points on the interpolated curve that is much higher in resolution

3. Finally, it will compute the reduced curve by sampling from the interpolated curve at `num_pre` points before MPP and `num_post` points after MPP (with a total of `num_pre + num_post - 1` points, -1 because we counted Vm i.e. voltage at MPP twice).

4. It will return the reduced axis and the reduced curve, you will select `num_pre` and `num_post`

```python

In [None]:
def reduce_curve(curve, voltage_raw, hi_res_V, num_pre, num_post):
    # 1) high-res interp
    y_hi = np.interp(hi_res_V, voltage_raw, curve)
    # 2) find Voc (zero-cross)
    neg_idx = np.where(y_hi < 0)[0]
    if len(neg_idx) > 0:
        i0 = neg_idx[0]
        # linear root-find between points i0-1,i0
        V1, V2 = hi_res_V[i0 - 1], hi_res_V[i0]
        y1, y2 = y_hi[i0 - 1], y_hi[i0]
        Voc = V1 - y1 * (V2 - V1) / (y2 - y1)
    else:
        Voc = hi_res_V[-1]
    # 3) find MPP
    P = hi_res_V * y_hi
    mpp = np.argmax(P)
    Vmp = hi_res_V[mpp]
    # 4) define pre/post grids
    v_pre = np.linspace(0, Vmp, num_pre)
    v_post = np.linspace(Vmp, Voc, num_post)
    # 5) sample
    y_pre = np.interp(v_pre, hi_res_V, y_hi)
    y_post = np.interp(v_post, hi_res_V, y_hi)
    v_red = np.concatenate((v_pre, v_post[1:]))
    y_red = np.concatenate((y_pre, y_post[1:]))
    return v_red, y_red


Here we will show how to use this function and plot the result for validation

In [None]:
hi_V = np.linspace(0, 1.4, 10_000)
num_pre, num_post = 3, 4

Vs, Ys = [], []
for curve in iV:
    v_red, y_red = reduce_curve(curve, voltage_raw, hi_V, num_pre, num_post)
    Vs.append(v_red)
    Ys.append(y_red)
Vs = np.stack(Vs)
Ys = np.stack(Ys)

# reconstruct back on voltage_raw
iV_recon = np.vstack(
    [
        np.interp(voltage_raw, Vs[i], Ys[i], left=np.nan, right=np.nan)
        for i in range(len(Ys))
    ]
)

# compute RMSE per curve
mask = ~np.isnan(iV_recon)
rmse = np.sqrt(np.mean((iV_recon - iV) ** 2, axis=1))
print(f"Grid {num_pre}+{num_post}: mean RMSE = {rmse.mean():.3e}")


In [None]:
# try a bunch of different pre/post grid sizes and see the RMSE
configs = [(2, 3), (3, 4), (5, 5), (10, 10)]
results = []
for np_pre, np_post in configs:
    Ys_tmp = []
    recon = []
    for curve in iV:
        vr, yr = reduce_curve(curve, voltage_raw, hi_V, np_pre, np_post)
        recon.append(np.interp(voltage_raw, vr, yr))
    recon = np.vstack(recon)
    rmse = np.sqrt(np.mean((recon - iV) ** 2, axis=1))
    results.append((np_pre, np_post, rmse.mean()))
print("pre, post, mean_RMSE")
print("\n".join(f"{a:2d},{b:2d},{c:.3e}" for a, b, c in results))


In [None]:
idx = np.random.choice(len(iV), 1)[0]  # random index for plotting
plt.figure(figsize=(6, 4))
plt.plot(voltage_raw, iV[idx], label="raw")
plt.plot(Vs[idx], Ys[idx], "o-", label=f"reduced {num_pre}+{num_post}")
plt.xlabel("Voltage (V)")
plt.ylabel("Current (A/m^2)")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Histogram of reconstruction RMSE
plt.figure(figsize=(5, 4))
plt.hist(rmse, bins=50, color="C1", alpha=0.8)
plt.xlabel("Reconstruction RMSE (A/m^2)")
plt.ylabel("Count")
plt.title(f"RMSE distribution ({num_pre}+{num_post} points)")
plt.grid(True)
plt.show()


In [None]:
plt.figure(figsize=(5, 5))
plt.scatter(iV_recon.flatten(), iV.flatten(), s=1, alpha=0.1)
mins = np.nanmin([iV_recon, iV])
maxs = np.nanmax([iV_recon, iV])
plt.plot([mins, maxs], [mins, maxs], "k--", lw=1)
plt.xlabel("Reconstructed current")
plt.ylabel("Original current")
plt.title("Raw vs. reconstructed")
plt.grid(True)
plt.show()

In [None]:
vocs, vmps = [], []
for curve in iV:
    # reuse the logic from reduce_curve to get Voc & Vmp
    y_hi = np.interp(hi_V, voltage_raw, curve)
    neg = np.where(y_hi < 0)[0]
    if len(neg) > 0:
        i0 = neg[0]
        V1, V2 = hi_V[i0 - 1], hi_V[i0]
        y1, y2 = y_hi[i0 - 1], y_hi[i0]
        voc = V1 - y1 * (V2 - V1) / (y2 - y1)
    else:
        voc = hi_V[-1]
    P = hi_V * y_hi
    idx_mpp = np.argmax(P)
    vmp = hi_V[idx_mpp]
    vocs.append(voc)
    vmps.append(vmp)

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.hist(vocs, bins=30, color="C2")
plt.title("Voc distribution")
plt.xlabel("Voc (V)")
plt.subplot(1, 2, 2)
plt.hist(vmps, bins=30, color="C3")
plt.title("Vmp distribution")
plt.xlabel("Vmp (V)")
plt.tight_layout()
plt.show()


We can also directly compute the cell metrics from the reduced curve (which in downstream will be predicted)

In [None]:
# compute IV metrics straight on the reduced grid
def iv_metrics_reduced(v_red, j_red):
    # Jsc at V=0
    Jsc = j_red[0]
    # Maximum power point on the discrete points
    P = v_red * j_red
    idx = np.argmax(P)
    Vmp, Jmp = v_red[idx], j_red[idx]
    Pmp = Vmp * Jmp
    # Voc is the last sampled voltage (should bracket zero‐cross)
    Voc = v_red[-1]
    FF = Pmp / (Voc * Jsc)
    return dict(Jsc=Jsc, Voc=Voc, Vmp=Vmp, Jmp=Jmp, Pmp=Pmp, FF=FF)


# example using your first prediction
y_pred = Ys[0]  # model output at Vs[0]
metrics = iv_metrics_reduced(Vs[0], y_pred)
print(metrics)


If you want to reconstruct the original curve on the full original axis, you can use an Interpolator like below but it is not necessary for our use case where we only care about positive values.

In [None]:
from scipy.interpolate import PchipInterpolator

def reconstruct_curve(v_red, y_pred, voltage_target):
    # use a shape‐preserving interpolator
    f = PchipInterpolator(v_red, y_pred, extrapolate=False)
    return f(voltage_target)


# example for one sample
y_pred = Ys[0]  # your model’s output at v_red = Vs[0]
voltage_full = voltage_raw  # or any grid you like
iV_full_pred = reconstruct_curve(Vs[0], y_pred, voltage_full)

plt.figure()
plt.plot(voltage_full, iV_full_pred, "-", label="pred full curve")
plt.plot(voltage_raw, iV[0], "--", label="ground truth")
plt.legend()
plt.xlabel("V")
plt.ylabel("J")
plt.show()

In [None]:
def iv_metrics(v, j):
    # assume j descending past zero‐cross; use linear root‐find for Voc
    neg = np.where(j < 0)[0]
    if len(neg) > 0:
        i0 = neg[0]
        V1, V2 = v[i0 - 1], v[i0]
        j1, j2 = j[i0 - 1], j[i0]
        Voc = V1 - j1 * (V2 - V1) / (j2 - j1)
    else:
        Voc = v[-1]
    # Jsc = j at V=0 (first point)
    Jsc = j[0]
    # MPP
    P = v * j
    idx = np.argmax(P)
    Vmp, Jmp = v[idx], j[idx]
    Pmp = Vmp * Jmp
    FF = Pmp / (Voc * Jsc)
    return dict(Jsc=Jsc, Voc=Voc, Vmp=Vmp, Jmp=Jmp, Pmp=Pmp, FF=FF)


metrics = iv_metrics(Vs[0], Ys[0])
print(metrics)
