In [12]:
# =========================
# 0) Imports + config
# =========================
import pickle, numpy as np, time
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

PKL_PATH = "JUPITER_MASTER_SPECTRA.pkl"

# Speed knobs
SEED = 7
N_RESAMPLE = 1024        # 512 if you want even faster
N_SYNTH = 1500           # 800–2000 is plenty for dry run
EPOCHS = 7               # 5–10
BATCH = 128
LR = 1e-3

device = "cuda" if torch.cuda.is_available() else "cpu"
rng = np.random.default_rng(SEED)
torch.manual_seed(SEED)
if device == "cuda":
    torch.cuda.manual_seed_all(SEED)

# Label space: Jupiter UV -> atmospheric species (not "elements/minerals")
SPECIES = ["CH4", "NH3", "C2H2", "C2H6"]

# Simple band templates (refine later using real line/band libraries)
BANDS = {
    "CH4": [(2350, 220), (2750, 260)],
    "NH3": [(2050,  90), (2150,  90)],
    "C2H2": [(2700, 110), (2810, 100)],
    "C2H6": [(2400, 140), (2550, 120)],
}

def gaussian(x, mu, sigma):
    return np.exp(-0.5 * ((x - mu) / sigma) ** 2)

# =========================
# 1) Load + clean real spectrum
# =========================
with open(PKL_PATH, "rb") as f:
    real = pickle.load(f)

w = np.asarray(real["wavelength"], dtype=float)
f = np.asarray(real["flux"], dtype=float)

mask = np.isfinite(w) & np.isfinite(f)
w, f = w[mask], f[mask]
idx = np.argsort(w)
w, f = w[idx], f[idx]

print("Loaded:", real.get("target", "unknown"), "| points:", len(w), "| device:", device)

# =========================
# 2) Resample to fixed length (fast + consistent)
# =========================
def resample_to_fixed(wave, flux, n=N_RESAMPLE):
    w_new = np.linspace(wave.min(), wave.max(), n)
    f_new = np.interp(w_new, wave, flux)
    return w_new.astype(np.float32), f_new.astype(np.float32)

w_fix, f_fix = resample_to_fixed(w, f, N_RESAMPLE)

# =========================
# 3) Preprocess -> channels (C, N)
# =========================
def robust_norm(x):
    med = np.median(x)
    iqr = np.percentile(x, 75) - np.percentile(x, 25)
    if iqr <= 0:
        iqr = 1.0
    return (x - med) / iqr

def make_channels(wave, flux):
    x = robust_norm(flux)
    d1 = np.gradient(x, wave)
    d2 = np.gradient(d1, wave)
    X = np.stack([x, d1, d2], axis=0).astype(np.float32)  # (3, N)
    return X

X_real = make_channels(w_fix, f_fix)   # (3, N)
X_real_t = torch.tensor(X_real[None, ...], dtype=torch.float32).to(device)  # (1,3,N)

# =========================
# 4) Synthetic data generator
# =========================
from scipy.signal import savgol_filter
import numpy as np

# 1) Build a smooth baseline from the REAL planet spectrum (full range)
# window_length must be odd and < len(w_fix)
WL = 151 if len(w_fix) > 151 else (len(w_fix)//2)*2 - 1
baseline = savgol_filter(f_fix.astype(float), window_length=WL, polyorder=3)

# Prevent weird negatives if any
baseline = np.clip(baseline, np.percentile(baseline, 1), np.percentile(baseline, 99))

def synth_spectrum(wave, labels):
    # 2) Use anchored baseline instead of invented polynomial
    # Add small continuum variation so model doesn't memorize exact baseline
    cont = baseline.copy()

    # small multiplicative drift (keeps shape, varies scale)
    drift = 1.0 + rng.normal(0, 0.01) + rng.normal(0, 0.005) * ((wave - wave.min()) / (wave.max() - wave.min()) - 0.5)
    cont = cont * drift

    spec = cont.copy()

    # 3) Absorption dips (same as before, with center jitter)
    for sp, present in labels.items():
        if not present:
            continue
        for (c, w0) in BANDS[sp]:
            if sp == "CH4":
                depth = rng.uniform(0.08, 0.25)
                width = w0 * rng.uniform(0.9, 1.6)
            else:
                depth = rng.uniform(0.03, 0.15)
                width = w0 * rng.uniform(0.7, 1.3)

            c_jit = c + rng.normal(0, 10)
            width = width * rng.uniform(0.9, 1.1)

            dip = 1.0 - depth * gaussian(wave, c_jit, width)
            spec *= dip

    # 4) Noise: proportional to signal level (more realistic)
    sigma = 0.01 * (np.max(spec) - np.min(spec) + 1e-8)
    noise = rng.normal(0, sigma, size=wave.shape[0])
    noise = np.convolve(noise, np.ones(7)/7, mode="same")
    spec = spec + noise

    return spec.astype(np.float32)


# =========================
# 5) Models: MLP, CNN, GRU
# =========================
K = len(SPECIES)
C = 3
N = N_RESAMPLE

class MLP(nn.Module):
    def __init__(self, c=C, n=N, k=K):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(c*n, 256),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, k)
        )
    def forward(self, x):  # x: (B,C,N)
        return self.net(x)

class CNN1D(nn.Module):
    def __init__(self, in_ch=C, k=K):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_ch, 32, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1),
            nn.Flatten(),
            nn.Linear(128, k)
        )
    def forward(self, x):
        return self.net(x)

class GRUClassifier(nn.Module):
    def __init__(self, c=C, k=K, hidden=64):
        super().__init__()
        self.gru = nn.GRU(input_size=c, hidden_size=hidden, num_layers=1,
                          batch_first=True, bidirectional=False)
        self.head = nn.Sequential(
            nn.Linear(hidden, k)
        )
    def forward(self, x):  # x: (B,C,N) -> (B,N,C)
        x = x.permute(0, 2, 1)
        _, h = self.gru(x)          # h: (1,B,H)
        h = h[-1]                   # (B,H)
        return self.head(h)

# =========================
# 6) Train + evaluate utilities
# =========================
loss_fn = nn.BCEWithLogitsLoss()

def micro_f1_from_logits(logits, y_true, thr=0.5):
    probs = torch.sigmoid(logits)
    y_hat = (probs >= thr).float()
    tp = (y_hat * y_true).sum()
    fp = (y_hat * (1 - y_true)).sum()
    fn = ((1 - y_hat) * y_true).sum()
    denom = (2*tp + fp + fn).clamp(min=1e-8)
    return (2*tp / denom).item()

@torch.no_grad()
def evaluate(model):
    model.eval()
    total_loss = 0.0
    all_logits, all_y = [], []
    for xb, yb in val_loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        total_loss += loss_fn(logits, yb).item() * xb.size(0)
        all_logits.append(logits)
        all_y.append(yb)
    total_loss /= len(X_val)
    logits = torch.cat(all_logits, dim=0)
    y_true = torch.cat(all_y, dim=0)
    f1 = micro_f1_from_logits(logits, y_true)
    return total_loss, f1

def train_model(model, name):
    model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=LR)
    best = {"loss": 1e9, "state": None, "f1": 0.0}
    t0 = time.time()

    for epoch in range(1, EPOCHS + 1):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            logits = model(xb)
            loss = loss_fn(logits, yb)
            loss.backward()
            opt.step()

        vloss, vf1 = evaluate(model)
        if vloss < best["loss"]:
            best = {"loss": vloss, "state": {k: v.detach().cpu() for k, v in model.state_dict().items()}, "f1": vf1}
        print(f"{name} | epoch {epoch:02d} | val_loss={vloss:.4f} | microF1={vf1:.3f}")

    dt = time.time() - t0
    return best, dt

# =========================
# 7) Train all three
# =========================
results = []

mlp = MLP()
best_mlp, t_mlp = train_model(mlp, "MLP")
results.append(("MLP", best_mlp["loss"], best_mlp["f1"], t_mlp, best_mlp))

cnn = CNN1D()
best_cnn, t_cnn = train_model(cnn, "CNN1D")
results.append(("CNN1D", best_cnn["loss"], best_cnn["f1"], t_cnn, best_cnn))

gru = GRUClassifier(hidden=64)
best_gru, t_gru = train_model(gru, "GRU")
results.append(("GRU", best_gru["loss"], best_gru["f1"], t_gru, best_gru))

print("\n=== Model comparison (lower loss better) ===")
for name, lossv, f1v, dt, _ in sorted(results, key=lambda x: x[1]):
    print(f"{name:5s} | val_loss={lossv:.4f} | microF1={f1v:.3f} | train_time={dt:.1f}s")

# pick best by val_loss
best_name, best_loss, best_f1, best_dt, best_blob = sorted(results, key=lambda x: x[1])[0]
print(f"\nBest model: {best_name} | val_loss={best_loss:.4f} | microF1={best_f1:.3f}")

# =========================
# 8) Inference on real Jupiter spectrum
# =========================
def load_best_model(name, state):
    if name == "MLP":
        m = MLP()
    elif name == "CNN1D":
        m = CNN1D()
    else:
        m = GRUClassifier(hidden=64)
    m.load_state_dict(state)
    m.to(device).eval()
    return m

best_model = load_best_model(best_name, best_blob["state"])

with torch.no_grad():
    logits = best_model(X_real_t)[0]
    probs = torch.sigmoid(logits).cpu().numpy()

print("\nPredicted probabilities on REAL Jupiter spectrum:")
for sp, p in sorted(zip(SPECIES, probs), key=lambda x: -x[1]):
    print(f"{sp:>4}: {p:.3f}")

# =========================
# 9) Save best model artifact (optional)
# =========================
# save_path = f"/mnt/data/{real.get('target','TARGET').upper()}_{best_name}_best.pt"
# torch.save({
#     "model_type": best_name,
#     "state_dict": best_blob["state"],
#     "species": SPECIES,
#     "n_resample": N_RESAMPLE,
#     "notes": "Trained on synthetic band-mixtures; dry-run model."
# }, save_path)
# print("\nSaved best model to:", save_path)


Loaded: JUPITER | points: 1024 | device: cpu
MLP | epoch 01 | val_loss=0.2263 | microF1=0.910
MLP | epoch 02 | val_loss=0.1707 | microF1=0.932
MLP | epoch 03 | val_loss=0.1377 | microF1=0.946
MLP | epoch 04 | val_loss=0.1320 | microF1=0.944
MLP | epoch 05 | val_loss=0.1185 | microF1=0.951
MLP | epoch 06 | val_loss=0.1196 | microF1=0.951
MLP | epoch 07 | val_loss=0.1142 | microF1=0.953
CNN1D | epoch 01 | val_loss=0.6899 | microF1=0.644
CNN1D | epoch 02 | val_loss=0.6768 | microF1=0.570
CNN1D | epoch 03 | val_loss=0.6501 | microF1=0.579
CNN1D | epoch 04 | val_loss=0.6132 | microF1=0.654
CNN1D | epoch 05 | val_loss=0.5976 | microF1=0.648
CNN1D | epoch 06 | val_loss=0.5947 | microF1=0.685
CNN1D | epoch 07 | val_loss=0.5853 | microF1=0.660
GRU | epoch 01 | val_loss=0.6920 | microF1=0.604
GRU | epoch 02 | val_loss=0.6886 | microF1=0.568
GRU | epoch 03 | val_loss=0.6819 | microF1=0.622
GRU | epoch 04 | val_loss=0.6609 | microF1=0.644
GRU | epoch 05 | val_loss=0.6591 | microF1=0.646
GRU | epoc

In [19]:
pip install optuna

Collecting optuna
  Downloading optuna-4.7.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.18.3-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.7.0-py3-none-any.whl (413 kB)
Downloading alembic-1.18.3-py3-none-any.whl (262 kB)
Downloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Downloading mako-1.3.10-py3-none-any.whl (78 kB)
Installing collected packages: Mako, colorlog, alembic, optuna

   ---------------------------------------- 0/4 [Mako]
   ---------------------------------------- 0/4 [Mako]
   ---------------------------------------- 0/4 [Mako]
   ---------- ----------------------------- 1/4 [colorlog]
   -------------------- ------------------- 2/4 [alembic]
   -------------------- ------------------- 2/4 [alembic]
  


[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [20]:
import pickle, numpy as np, time
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from scipy.signal import savgol_filter

import optuna

# -------------------------
# Config
# -------------------------
PKL_PATH = "JUPITER_MASTER_SPECTRA.pkl"

SEED = 7
N_RESAMPLE = 1024

N_SYNTH_FINAL = 4000
EPOCHS_FINAL = 25
PATIENCE_FINAL = 7
BATCH = 128

# Optuna tuning budget (keep it small & fast)
N_TRIALS = 25
N_SYNTH_TUNE = 1600
EPOCHS_TUNE = 12
PATIENCE_TUNE = 4

device = "cuda" if torch.cuda.is_available() else "cpu"
rng = np.random.default_rng(SEED)
torch.manual_seed(SEED)
if device == "cuda":
    torch.cuda.manual_seed_all(SEED)

SPECIES = ["CH4", "NH3", "C2H2", "C2H6"]

BANDS = {
    "CH4": [(2350, 260), (2750, 320)],
    "NH3": [(2050, 140), (2150, 140)],
    "C2H2": [(2700, 140), (2810, 130)],
    "C2H6": [(2400, 170), (2550, 150)],
}

def gaussian(x, mu, sigma):
    return np.exp(-0.5 * ((x - mu) / sigma) ** 2)

# -------------------------
# Load + resample
# -------------------------
with open(PKL_PATH, "rb") as f:
    real = pickle.load(f)

w = np.asarray(real["wavelength"], dtype=float)
f = np.asarray(real["flux"], dtype=float)
mask = np.isfinite(w) & np.isfinite(f)
w, f = w[mask], f[mask]
idx = np.argsort(w)
w, f = w[idx], f[idx]

def resample_to_fixed(wave, flux, n=N_RESAMPLE):
    w_new = np.linspace(wave.min(), wave.max(), n)
    f_new = np.interp(w_new, wave, flux)
    return w_new.astype(np.float32), f_new.astype(np.float32)

w_fix, f_fix = resample_to_fixed(w, f, N_RESAMPLE)
print("Loaded:", real.get("target","unknown"), "| points:", len(w_fix), "| device:", device)

# -------------------------
# Baseline + channels (baseline window is tunable)
# -------------------------
def compute_baseline(flux, win=151, poly=3):
    n = len(flux)
    win = int(win)
    if win >= n:
        win = n-1
    if win < 11:
        win = 11
    if win % 2 == 0:
        win += 1
    b = savgol_filter(flux.astype(float), window_length=win, polyorder=poly)
    eps = 1e-12
    b = np.clip(b, np.percentile(b, 1), np.percentile(b, 99)) + eps
    return b.astype(np.float32)

def make_channels(wave, flux, win):
    base = compute_baseline(flux, win=win, poly=3)
    r = (flux / base) - 1.0
    r = (r - np.median(r)) / (np.std(r) + 1e-8)
    d1 = np.gradient(r, wave)
    d2 = np.gradient(d1, wave)
    return np.stack([r, d1, d2], axis=0).astype(np.float32)

# real tensor depends on win, so we compute it inside train functions

# -------------------------
# Synthetic generator (anchored to REAL baseline)
# -------------------------
def sample_labels():
    y = {sp: 0 for sp in SPECIES}
    y["CH4"] = 1 if rng.random() < 0.85 else 0
    y["NH3"] = 1 if (y["CH4"] and rng.random() < 0.25) else 0
    y["C2H2"] = 1 if rng.random() < 0.20 else 0
    y["C2H6"] = 1 if rng.random() < 0.20 else 0
    if sum(y.values()) == 0:
        y["CH4"] = 1
    return y

real_baseline_default = compute_baseline(f_fix, win=151)

def synth_spectrum(wave, labels, baseline):
    cont = baseline.copy()
    x = (wave - wave.min()) / (wave.max() - wave.min())
    drift = 1.0 + rng.normal(0, 0.01) + rng.normal(0, 0.01) * (x - 0.5)
    spec = cont * drift

    for sp, present in labels.items():
        if not present:
            continue
        for (c, w0) in BANDS[sp]:
            if sp == "CH4":
                depth = rng.uniform(0.08, 0.28)
            elif sp == "C2H2":
                depth = rng.uniform(0.02, 0.10)
            else:
                depth = rng.uniform(0.03, 0.18)

            width = w0 * rng.uniform(0.85, 1.25)
            c_jit = c + rng.normal(0, 15)
            dip = 1.0 - depth * gaussian(wave, c_jit, width)
            spec *= dip

    sigma = 0.01 * (np.max(spec) - np.min(spec) + 1e-8)
    noise = rng.normal(0, sigma, size=wave.shape[0])
    noise = np.convolve(noise, np.ones(7)/7, mode="same")
    return (spec + noise).astype(np.float32)

def build_dataset(wave, n, win):
    # baseline used for synthesis: compute once per run
    baseline = compute_baseline(f_fix, win=win, poly=3)
    X_list, Y_list = [], []
    for _ in range(n):
        lab = sample_labels()
        spec = synth_spectrum(wave, lab, baseline)
        X = make_channels(wave, spec, win=win)
        y = np.array([lab[sp] for sp in SPECIES], dtype=np.float32)
        X_list.append(X)
        Y_list.append(y)
    X = np.stack(X_list, axis=0)
    Y = np.stack(Y_list, axis=0)
    perm = rng.permutation(len(X))
    X, Y = X[perm], Y[perm]
    n_train = int(0.85 * len(X))
    return X[:n_train], Y[:n_train], X[n_train:], Y[n_train:]

# -------------------------
# Train function (used by Optuna and final training)
# -------------------------
def train_mlp_once(h1, h2, drop1, drop2, lr, wd, win, n_synth, epochs, patience):
    X_train, Y_train, X_val, Y_val = build_dataset(w_fix, n=n_synth, win=win)

    train_loader = DataLoader(
        TensorDataset(torch.tensor(X_train), torch.tensor(Y_train)),
        batch_size=BATCH, shuffle=True
    )
    val_loader = DataLoader(
        TensorDataset(torch.tensor(X_val), torch.tensor(Y_val)),
        batch_size=BATCH, shuffle=False
    )

    C, N = 3, N_RESAMPLE
    K = len(SPECIES)

    class MLP(nn.Module):
        def __init__(self):
            super().__init__()
            self.net = nn.Sequential(
                nn.Flatten(),
                nn.Linear(C*N, h1),
                nn.ReLU(),
                nn.Dropout(drop1),
                nn.Linear(h1, h2),
                nn.ReLU(),
                nn.Dropout(drop2),
                nn.Linear(h2, K)
            )
        def forward(self, x):
            return self.net(x)

    model = MLP().to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    loss_fn = nn.BCEWithLogitsLoss()

    @torch.no_grad()
    def eval_val():
        model.eval()
        tot = 0.0
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            tot += loss_fn(model(xb), yb).item() * xb.size(0)
        return tot / len(X_val)

    best_loss, best_state, bad = 1e9, None, 0

    for _ in range(epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            logits = model(xb)
            loss = loss_fn(logits, yb)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()

        vloss = eval_val()
        if vloss < best_loss - 1e-4:
            best_loss = vloss
            best_state = {k: v.detach().cpu() for k, v in model.state_dict().items()}
            bad = 0
        else:
            bad += 1
            if bad >= patience:
                break

    # reload best
    model.load_state_dict(best_state)
    model.eval()

    # real Jupiter inference (for sanity / to store)
    X_real = make_channels(w_fix, f_fix, win=win)
    X_real_t = torch.tensor(X_real[None, ...], dtype=torch.float32).to(device)

    with torch.no_grad():
        logits = model(X_real_t)[0].cpu().numpy()

    return best_loss, logits, best_state

# -------------------------
# Optuna objective
# -------------------------
def objective(trial):
    h1 = trial.suggest_categorical("h1", [256, 384, 512, 768])
    h2 = trial.suggest_categorical("h2", [128, 192, 256, 384])
    drop1 = trial.suggest_float("drop1", 0.10, 0.35)
    drop2 = trial.suggest_float("drop2", 0.00, 0.25)
    lr = trial.suggest_float("lr", 1e-4, 1e-3, log=True)
    wd = trial.suggest_float("weight_decay", 1e-6, 3e-4, log=True)
    win = trial.suggest_categorical("baseline_win", [101, 151, 201, 251])
    T = trial.suggest_float("temp", 1.0, 1.7)

    vloss, logits, _ = train_mlp_once(
        h1=h1, h2=h2, drop1=drop1, drop2=drop2,
        lr=lr, wd=wd, win=win,
        n_synth=N_SYNTH_TUNE,
        epochs=EPOCHS_TUNE,
        patience=PATIENCE_TUNE
    )

    probs = 1 / (1 + np.exp(-logits / T))
    ch4 = float(probs[SPECIES.index("CH4")])

    # light sanity penalty to avoid "CH4 near zero" configs
    penalty = 0.0
    if ch4 < 0.5:
        penalty += (0.5 - ch4) * 2.0

    trial.set_user_attr("jupiter_probs", {sp: float(p) for sp, p in zip(SPECIES, probs)})
    return float(vloss + penalty)

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_TRIALS)

print("\nBest params:", study.best_params)
print("Best objective:", study.best_value)
print("Jupiter probs (best trial):", study.best_trial.user_attrs["jupiter_probs"])

# -------------------------
# Final train with best params on full synth
# -------------------------
bp = study.best_params
final_vloss, final_logits, final_state = train_mlp_once(
    h1=bp["h1"], h2=bp["h2"], drop1=bp["drop1"], drop2=bp["drop2"],
    lr=bp["lr"], wd=bp["weight_decay"], win=bp["baseline_win"],
    n_synth=N_SYNTH_FINAL, epochs=EPOCHS_FINAL, patience=PATIENCE_FINAL
)

T = bp["temp"]
final_probs = 1 / (1 + np.exp(-final_logits / T))

print("\nFINAL (Optuna-tuned) Jupiter probabilities:")
for sp, p in sorted(zip(SPECIES, final_probs), key=lambda x: -x[1]):
    print(f"{sp:>4}: {p:.3f}")

# Optional: save tuned model
save_path = "JUPITER_MLP_OPTUNA.pt"
torch.save({
    "planet": "JUPITER",
    "species": SPECIES,
    "bands": BANDS,
    "best_params": bp,
    "val_loss": final_vloss,
    "state_dict": final_state,
    "n_resample": N_RESAMPLE,
}, save_path)
print("\nSaved:", save_path)


  from .autonotebook import tqdm as notebook_tqdm
[32m[I 2026-02-07 21:36:27,238][0m A new study created in memory with name: no-name-cf917426-0dea-430d-9c0f-60c039201daa[0m


Loaded: JUPITER | points: 1024 | device: cpu


[32m[I 2026-02-07 21:36:30,475][0m Trial 0 finished with value: 0.4400597353776296 and parameters: {'h1': 512, 'h2': 128, 'drop1': 0.26468233448057316, 'drop2': 0.1493883376455455, 'lr': 0.00018237626186339215, 'weight_decay': 0.00015924496426865972, 'baseline_win': 101, 'temp': 1.0116894981161977}. Best is trial 0 with value: 0.4400597353776296.[0m
[32m[I 2026-02-07 21:36:32,797][0m Trial 1 finished with value: 0.44394630988438927 and parameters: {'h1': 384, 'h2': 384, 'drop1': 0.28342670912198886, 'drop2': 0.055662307995513416, 'lr': 0.0004943609370698491, 'weight_decay': 5.824647404001128e-06, 'baseline_win': 201, 'temp': 1.5041641564012336}. Best is trial 0 with value: 0.4400597353776296.[0m
[32m[I 2026-02-07 21:36:34,911][0m Trial 2 finished with value: 0.4720622539520264 and parameters: {'h1': 384, 'h2': 128, 'drop1': 0.2342494931461418, 'drop2': 0.17601930928829373, 'lr': 0.0008280591283567678, 'weight_decay': 2.9449016343017413e-05, 'baseline_win': 251, 'temp': 1.005899


Best params: {'h1': 512, 'h2': 256, 'drop1': 0.21864223114569922, 'drop2': 0.1792407709503364, 'lr': 0.00037684958354375, 'weight_decay': 1.0520115738007038e-05, 'baseline_win': 151, 'temp': 1.607490761746618}
Best objective: 0.41226362784703574
Jupiter probs (best trial): {'CH4': 0.8713372945785522, 'NH3': 0.35000383853912354, 'C2H2': 0.2323499470949173, 'C2H6': 0.2261686474084854}

FINAL (Optuna-tuned) Jupiter probabilities:
 CH4: 0.948
C2H6: 0.034
C2H2: 0.001
 NH3: 0.000

Saved: JUPITER_MLP_OPTUNA.pt
