In [23]:
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from pandas.api.types import CategoricalDtype

# ────────────────────────────────────────────────────────────────────────────────
# CATEGORY MAP & FEATURE LIST
# ────────────────────────────────────────────────────────────────────────────────
categories_map = {
    'Input 1': [1,2],
    'Input 2': list(range(1,6)),
    'Input 3': list(range(1,6)),
    'Input 4': list(range(1,22)),
    'Input 5': list(range(1,6)),
    'Input 6': [1,2,3],
    'Input 7': [1,2,3,4],
    'Input 8': list(range(1,11)),
}
all_inputs     = [f'Input {i}' for i in range(1,19) if i != 12]
numeric_inputs = [f'Input {i}' for i in range(9,19) if i != 12]

# ────────────────────────────────────────────────────────────────────────────────
# MLP CLASS
# ────────────────────────────────────────────────────────────────────────────────
class MLPNet(nn.Module):
    def __init__(self, in_dim, hidden_dims, activations):
        super().__init__()
        layers = []
        dims = [in_dim] + hidden_dims
        for i, h in enumerate(hidden_dims):
            layers.append(nn.Linear(dims[i], dims[i+1]))
            act = activations[i].lower()
            if act == 'relu':
                layers.append(nn.ReLU())
            elif act == 'tanh':
                layers.append(nn.Tanh())
            elif act == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif act == 'softplus':
                layers.append(nn.Softplus())
            else:
                raise ValueError(f"Unknown activation '{activations[i]}'")
        layers.append(nn.Linear(dims[-1], 1))
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)

# ────────────────────────────────────────────────────────────────────────────────
# SWEEP & SAVE FUNCTION
# ────────────────────────────────────────────────────────────────────────────────
def sweep_save_samples_mlp(
    model_path: str,
    norms_json: str,
    hidden_dims: list[int],
    activations: list[str],
    test_csv: str,
    output_csv: str
):
    """
    Sweeps Input 10 over [0,3000] for each test sample and saves results:
      Row 0: sample indices
      Row 1: (true_input10|true_output) for each sample
      Rows 2+: each row begins with grid Input 10, then model predictions
    """
    # 0) load norms
    with open(norms_json, 'r') as f:
        norms = json.load(f)
    y_mean, y_std        = norms['y_mean'], norms['y_std']
    feat_mean, feat_std  = norms['feat_mean'], norms['feat_std']

    # 1) load test data
    df = pd.read_csv(test_csv)
    N = len(df)
    y_true   = df['Output'].to_numpy(float)
    x10_true = df['Input 10'].to_numpy(float)

    # 2) build normalized feature matrix Z_np
    X = df[all_inputs].copy()
    for col, cats in categories_map.items():
        if col not in X: continue
        X[col] = pd.Categorical(X[col], categories=cats)
        dummies = pd.get_dummies(X[col], prefix=col)
        for c in cats:
            key = f"{col}_{c}"
            if key not in dummies:
                dummies[key] = 0
        X = pd.concat([X.drop(columns=[col]), dummies], axis=1)
    for col in numeric_inputs:
        X[col] = (X[col] - feat_mean[col]) / feat_std[col]
    Z_np_full = X.to_numpy(dtype=np.float32)

    # 3) infer feature dimension from checkpoint
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    ckpt = torch.load(model_path, map_location=device)
    first_w = ckpt.get('net.0.weight', next(iter(ckpt.values())))
    in_dim = first_w.shape[1]
    # adjust columns if mismatch
    if Z_np_full.shape[1] > in_dim:
        Z_np = Z_np_full[:, :in_dim]
    elif Z_np_full.shape[1] < in_dim:
        pad = np.zeros((N, in_dim - Z_np_full.shape[1]), dtype=np.float32)
        Z_np = np.hstack([Z_np_full, pad])
    else:
        Z_np = Z_np_full

    # 4) load model
    model = MLPNet(in_dim, hidden_dims, activations).to(device)
    model.load_state_dict(ckpt)
    model.eval()

    # 5) sweep parameters
    grid  = np.linspace(0, 3000, 101)
    mu10  = feat_mean['Input 10']
    sig10 = feat_std['Input 10']
    idx10 = all_inputs.index('Input 10')

    preds = np.zeros((len(grid), N), dtype=float)
    with torch.no_grad():
        for i in range(N):
            base = np.repeat(Z_np[i:i+1], len(grid), axis=0)
            base[:, idx10] = (grid - mu10) / sig10
            out_norm = model(torch.from_numpy(base).to(device)).cpu().numpy().ravel()
            preds[:, i] = out_norm * y_std + y_mean

    # 6) write results to CSV
    with open(output_csv, 'w') as f:
        # Row 0: sample indices, preceded by empty col
        f.write(',' + ','.join(str(i) for i in range(N)) + '\n')
        # Row 1: true values with "|" separator in tuple
        f.write(',' + ','.join(f"({x10_true[i]:.4f}|{y_true[i]:.4f})"
                                for i in range(N)) + '\n')
        # Rows 2+: each starts with grid value, then predictions
        for j, g in enumerate(grid):
            f.write(f"{g:.4f}," +
                    ','.join(f"{preds[j,i]:.6f}" for i in range(N)) + '\n')

    print(f"Sweep data saved to {output_csv}")


In [24]:
sweep_save_samples_mlp(
    model_path   = "/home/kamiar/chevron/Acid-neural-net/First Data-V3/58c0fccc/58c0fccc_fold4.pth",
    norms_json   = "/home/kamiar/chevron/Acid-neural-net/First Data-V3/58c0fccc/58c0fccc_norms.json",
    hidden_dims  = [4, 4],
    activations  = ['softplus','softplus'],
    test_csv     = "/home/kamiar/chevron/Acid-neural-net/First Data-V3/data/test.csv",
    output_csv   = "sweep_results.csv"
)


Sweep data saved to sweep_results.csv
