In [14]:
import os
import json
import pandas as pd
import numpy as np
import torch
from torch import nn

def generate_sensitivity_csvs(
    run_folder: str,
    fold: int,
    test_csv: str,
    sample_indices: list[int],
    total_propellant_vals: list[float],
    fluid_ratio_min: float = 0.4,
    fluid_ratio_max: float = 10.0,
    num_fluid_points: int = 10,
    output_folder: str = "sensitivity_csvs"
):
    """
    For each total_propellant in total_propellant_vals, produce a CSV:
      - Columns: the swept Total Fluid values
      - Rows: each (sample_index, fluid_type) pair
      - Cell: predicted Output 1 (first output) for that sample & fluid type
    """
    # ─── Column headers ───────────────────────────────────────────────────────
    output_cols       = ["BOE_Prodoction_2 year cum", "BOE_Production_6mon cum"]
    gpi_col           = "GPI (gross perforated interval ft)"
    prop_per_gpi_col  = "Proppant.per.GPI..lb.ft."
    fluid_per_gpi_col = "Fluid.per.GPI..gal.ft."
    total_prop_col    = "Total.Proppant.Volume"
    total_fluid_col   = "Total.Fluid"
    fluid_type_col    = "Fluid.Type"

    # ─── Load hyperparams & norms ─────────────────────────────────────────────
    run_id     = os.path.basename(os.path.normpath(run_folder))
    with open(os.path.join(run_folder, f"{run_id}_hyperparams.json")) as f:
        hp = json.load(f)
    with open(os.path.join(run_folder, f"{run_id}_norms.json")) as f:
        norms = json.load(f)
    layer_dims, activations = hp["layer_dims"], hp["activations"]
    y_mean = np.array(norms["y_mean"], dtype=np.float32)
    y_std  = np.array(norms["y_std"],  dtype=np.float32)
    x_mean = norms["x_mean"]
    x_std  = norms["x_std"]

    # ─── Load test data ───────────────────────────────────────────────────────
    df = pd.read_csv(test_csv)

    numeric_feats = list(x_mean.keys())
    fluid_types   = sorted(df[fluid_type_col].unique())
    dummy_feats   = [f"{fluid_type_col}_{ft}" for ft in fluid_types]

    # ─── Define & load the trained MLP ────────────────────────────────────────
    class MLPNet(nn.Module):
        def __init__(self, in_dim, hidden_dims, activations, out_dim):
            super().__init__()
            layers, dims = [], [in_dim] + hidden_dims
            for i, h in enumerate(hidden_dims):
                layers.append(nn.Linear(dims[i], dims[i+1]))
                act = activations[i].lower()
                if   act=='relu':     layers.append(nn.ReLU())
                elif act=='tanh':     layers.append(nn.Tanh())
                elif act=='sigmoid':  layers.append(nn.Sigmoid())
                elif act=='softplus': layers.append(nn.Softplus())
                else: raise ValueError(f"Unknown activation '{activations[i]}'")
            layers.append(nn.Linear(dims[-1], out_dim))
            self.net = nn.Sequential(*layers)
        def forward(self, x):
            return self.net(x)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = MLPNet(
        in_dim      = len(numeric_feats) + len(dummy_feats),
        hidden_dims = layer_dims,
        activations = activations,
        out_dim     = len(output_cols)
    ).to(device)
    model.load_state_dict(torch.load(
        os.path.join(run_folder, f"{run_id}_fold{fold}.pth"),
        map_location=device
    ))
    model.eval()

    # ─── Ensure output directory exists ───────────────────────────────────────
    os.makedirs(output_folder, exist_ok=True)

    # ─── For each propellant value, build and save a CSV ─────────────────────
    for prop_val in total_propellant_vals:
        # generate a list of total fluid sweep values
        fluid_vals = np.linspace(
            prop_val * fluid_ratio_min,
            prop_val * fluid_ratio_max,
            num_fluid_points
        ).tolist()

        # prepare a mapping: (sample_index, fluid_type) -> list of preds
        data = []
        index = []

        for sample_index in sample_indices:
            sample = df.iloc[sample_index]
            gpi = float(sample[gpi_col])

            # baseline numeric features from sample
            base_feats = {f: float(sample[f]) for f in numeric_feats}
            # we'll overwrite dummies per fluid_type
            for d in dummy_feats:
                base_feats[d] = 0.0

            for ft in fluid_types:
                # one-hot encode this fluid type
                for dft in dummy_feats:
                    base_feats[dft] = 1.0 if dft == f"{fluid_type_col}_{ft}" else 0.0

                # collect predictions over fluid_vals
                preds = []
                for fv in fluid_vals:
                    base_feats[total_prop_col]  = prop_val
                    base_feats[total_fluid_col] = fv
                    base_feats[prop_per_gpi_col]  = prop_val / gpi
                    base_feats[fluid_per_gpi_col] = fv    / gpi

                    # build & normalize input vector
                    x_vec = [(base_feats[f] - x_mean[f]) / x_std[f] for f in numeric_feats]
                    x_vec += [base_feats[d] for d in dummy_feats]

                    X_in = torch.tensor([x_vec], dtype=torch.float32).to(device)
                    with torch.no_grad():
                        yp_n = model(X_in).cpu().numpy().flatten()
                    # only first output
                    yp = yp_n[0] * y_std[0] + y_mean[0]
                    preds.append(float(yp))

                data.append(preds)
                index.append((sample_index, ft))

        # build DataFrame
        df_out = pd.DataFrame(
            data,
            index=pd.MultiIndex.from_tuples(index, names=["sample_index","fluid_type"]),
            columns=[f"{fv:.2f}" for fv in fluid_vals]
        )

        # save to CSV
        out_path = os.path.join(output_folder, f"prop_{prop_val:.0f}.csv")
        df_out.to_csv(out_path)
        print(f"Saved {out_path}")


In [15]:
generate_sensitivity_csvs(
    run_folder="/home/kamiar/chevron/Eagle-Ford/First/606d04aa",
    fold=6,
    test_csv="/home/kamiar/chevron/Eagle-Ford/First/data/Eagle Ford Data(Eagle Ford)_test.csv",
    sample_indices=list(range(127)),                 # example list of sample indices
    total_propellant_vals=np.linspace(300_000, 20_300_000, num=11).tolist(),          # propellant sweep
    fluid_ratio_min=0.4,
    fluid_ratio_max=2.0,
    num_fluid_points=15,
    output_folder="sensitivity_csvs"
)


Saved sensitivity_csvs/prop_300000.csv
Saved sensitivity_csvs/prop_2300000.csv
Saved sensitivity_csvs/prop_4300000.csv
Saved sensitivity_csvs/prop_6300000.csv
Saved sensitivity_csvs/prop_8300000.csv
Saved sensitivity_csvs/prop_10300000.csv
Saved sensitivity_csvs/prop_12300000.csv
Saved sensitivity_csvs/prop_14300000.csv
Saved sensitivity_csvs/prop_16300000.csv
Saved sensitivity_csvs/prop_18300000.csv
Saved sensitivity_csvs/prop_20300000.csv
