In [2]:
# ===== XGBoost for unseen MOSFET (25% per device, common Vbus..Ls11 combos) =====
# 1) Setup
import os
import numpy as np
import pandas as pd

# If xgboost isn't available in your environment, uncomment:
# !pip install xgboost -q

from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score, mean_squared_error

# ---- USER SETTINGS ----
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"     # <-- change to your merged CSV path
DEVICE_COL = "Part_Number"            # or "MOSFET"/"Device" if that's your column
UNSEEN_DEVICE = "C2M0025120D"         # <-- set the unseen MOSFET you want (the "most similar" one)
SAMPLE_FRAC = 0.25                    # 25% per device
RANDOM_STATE = 42

# Core input columns + targets (edit if your names differ)
COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]  # used to match rows across devices
INPUT_COLS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    "Tp1","Tp2","Toff","Tstart","L1","L2","L3","R1",
    # "Part_Number",  # <- intentionally DROPPED to prevent leakage and unseen label issues
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max","VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ","Ciss","Coss","Crss","Rth_JC_typ","Rth_JC_max","Tj_max",
]
TARGET_COLS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# 2) Load
df = pd.read_csv(CSV_PATH)
assert DEVICE_COL in df.columns, f"Device column '{DEVICE_COL}' not found. Columns: {df.columns.tolist()}"

# Keep only columns that exist
INPUT_COLS = [c for c in INPUT_COLS if c in df.columns]
TARGET_COLS = [c for c in TARGET_COLS if c in df.columns]
missing_targets = [c for c in [
    "overshoot_pulse_1","overshoot_pulse_2","undershoot_pulse_1","undershoot_pulse_2","ringing_frequency_MHz"
] if c not in TARGET_COLS]
if missing_targets:
    print("Warning: Missing targets:", missing_targets)

# 3) Filter to devices of interest (train devices = all except UNSEEN_DEVICE)
devices_all = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices_all, f"UNSEEN_DEVICE '{UNSEEN_DEVICE}' not found in data. Found: {devices_all}"
train_devices = [d for d in devices_all if d != UNSEEN_DEVICE]

# 4) Build a combo key for (Vbus..Ls11) and keep ONLY combos shared by train+test
for col in COMBO_COLS:
    if col not in df.columns:
        raise ValueError(f"Combo column '{col}' missing from data. Present: {df.columns.tolist()}")

# Optional: round combo cols to reduce floating-point mismatch (adjust decimals if needed)
df["_combo_key"] = (
    df[COMBO_COLS]
    .apply(lambda row: tuple([float(row[c]) if pd.notna(row[c]) else np.nan for c in COMBO_COLS]), axis=1)
)

# Gather combos present in the unseen device
combos_unseen = set(df.loc[df[DEVICE_COL] == UNSEEN_DEVICE, "_combo_key"].dropna().unique().tolist())

# Gather combos present in ALL train devices
combos_by_train_dev = []
for d in train_devices:
    combos_by_train_dev.append(set(df.loc[df[DEVICE_COL] == d, "_combo_key"].dropna().unique().tolist()))

if combos_by_train_dev:
    common_train = set.intersection(*combos_by_train_dev)
else:
    common_train = set()

# Final common combos must be in BOTH (unseen) AND (all train devices)
common_combos = common_train.intersection(combos_unseen)
print(f"Common combos across train devices and unseen '{UNSEEN_DEVICE}': {len(common_combos)}")

# 5) Filter to only common combos + selected devices
df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])].copy()
print(f"Rows after filtering to common combos & selected devices: {len(df_common)}")

# 6) Sample 25% PER DEVICE
def sample_per_device(g):
    n = int(np.ceil(len(g) * SAMPLE_FRAC))
    return g.sample(n=n, random_state=RANDOM_STATE)

df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(sample_per_device).reset_index(drop=True)
print("Sampled rows per device:")
print(df_sampled[DEVICE_COL].value_counts())

# 7) Train/test split (leave-one-device-out)
df_train = df_sampled[df_sampled[DEVICE_COL] != UNSEEN_DEVICE].copy()
df_test  = df_sampled[df_sampled[DEVICE_COL] == UNSEEN_DEVICE].copy()

X_train = df_train[INPUT_COLS].copy()
y_train = df_train[TARGET_COLS].copy()
X_test  = df_test[INPUT_COLS].copy()
y_test  = df_test[TARGET_COLS].copy()

# Handle any remaining NaNs (simple fill; you can do better imputation if needed)
X_train = X_train.fillna(X_train.median(numeric_only=True))
X_test  = X_test.fillna(X_train.median(numeric_only=True))  # use train medians
y_train = y_train.fillna(y_train.median(numeric_only=True))
y_test  = y_test.fillna(y_train.median(numeric_only=True))

print(f"Train shape: X={X_train.shape}, y={y_train.shape}")
print(f"Test  shape: X={X_test.shape}, y={y_test.shape}")

# 8) Model: Multi-output XGB (one model per target)
xgb = XGBRegressor(
    n_estimators=600,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    objective="reg:squarederror",
    random_state=RANDOM_STATE,
    n_jobs=-1
)
model = MultiOutputRegressor(xgb)
model.fit(X_train, y_train)

# 9) Evaluate on unseen device
y_pred = pd.DataFrame(model.predict(X_test), columns=TARGET_COLS, index=y_test.index)

def rmse(a,b): return float(np.sqrt(mean_squared_error(a, b)))
def r2(a,b):   return float(r2_score(a, b))

print("\n=== Metrics on unseen device:", UNSEEN_DEVICE, "===")
per_target = []
for col in TARGET_COLS:
    r = {
        "target": col,
        "RMSE": rmse(y_test[col], y_pred[col]),
        "R2": r2(y_test[col], y_pred[col]),
        "test_count": int(y_test[col].notna().sum())
    }
    per_target.append(r)
metrics_df = pd.DataFrame(per_target).sort_values("R2", ascending=False)
print(metrics_df.to_string(index=False))

# Overall (macro) metrics
overall_rmse = rmse(y_test.values, y_pred.values)
overall_r2   = r2(y_test.values, y_pred.values)
print(f"\nOverall RMSE: {overall_rmse:.6g}")
print(f"Overall R2  : {overall_r2:.6g}")

# 10) Save predictions
out_pred = df_test[[DEVICE_COL] + COMBO_COLS].copy()
out_pred = out_pred.join(y_test.reset_index(drop=True), rsuffix="_true")
for c in TARGET_COLS:
    out_pred[c+"_pred"] = y_pred[c].values
OUT_PATH = os.path.join(os.path.dirname(CSV_PATH), f"xgb_unseen_{UNSEEN_DEVICE}_predictions.csv")
out_pred.to_csv(OUT_PATH, index=False)
print(f"\n✅ Saved predictions: {OUT_PATH}")


Common combos across train devices and unseen 'C2M0025120D': 86335
Rows after filtering to common combos & selected devices: 431675


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(sample_per_device).reset_index(drop=True)


Sampled rows per device:
Part_Number
C2M0025120D    21584
C2M0040120D    21584
C2M0080120D    21584
C2M0160120D    21584
C2M0280120D    21584
Name: count, dtype: int64
Train shape: X=(86336, 36), y=(86336, 13)
Test  shape: X=(21584, 36), y=(21584, 13)

=== Metrics on unseen device: C2M0025120D ===
                  target         RMSE         R2  test_count
       overshoot_pulse_1 6.664583e+00   0.803265       21584
       overshoot_pulse_2 1.224818e+01   0.552303       21584
      undershoot_pulse_2 5.759954e+00   0.363720       21584
      undershoot_pulse_1 6.194917e+00   0.270770       21584
current_fall_time_pulse1 1.531377e-08  -0.925720       21584
current_fall_time_pulse2 1.529532e-08  -0.928529       21584
voltage_rise_time_pulse2 6.805546e-09  -3.047395       21584
current_rise_time_pulse1 4.850171e-08  -3.954743       21584
current_rise_time_pulse2 3.986886e-08  -7.521805       21584
   ringing_frequency_MHz 7.940080e+00  -7.550750       21584
voltage_rise_time_pulse1 7.003

In [None]:
# ===== XGBoost for unseen MOSFET (25% per device, with Part_Number encoding) =====
import os
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder

# ---- USER SETTINGS ----
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"     # <-- change to your merged CSV path
DEVICE_COL = "Part_Number"            # or "MOSFET"/"Device" if that's your column
UNSEEN_DEVICE = "C2M0025120D"         # <-- set the unseen MOSFET you want
SAMPLE_FRAC = 0.25                    # 25% per device
RANDOM_STATE = 42

# Core input columns + targets
COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]
INPUT_COLS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    "Tp1","Tp2","Toff","Tstart","L1","L2","L3","R1",
    "Part_Number",  # keep for encoding
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max","VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ","Ciss","Coss","Crss",
    "Rth_JC_typ","Rth_JC_max","Tj_max"
]
TARGET_COLS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# Load data
df = pd.read_csv(CSV_PATH)

# Filter devices
devices_all = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices_all, f"{UNSEEN_DEVICE} not in dataset"
train_devices = [d for d in devices_all if d != UNSEEN_DEVICE]

# Create combo key
df["_combo_key"] = df[COMBO_COLS].apply(lambda row: tuple(row.values), axis=1)

# Common combos
combos_unseen = set(df[df[DEVICE_COL] == UNSEEN_DEVICE]["_combo_key"].unique())
combos_train_sets = [set(df[df[DEVICE_COL] == d]["_combo_key"].unique()) for d in train_devices]
common_train = set.intersection(*combos_train_sets)
common_combos = combos_unseen.intersection(common_train)

df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])]

# Sample 25% per device
df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(
    lambda g: g.sample(frac=SAMPLE_FRAC, random_state=RANDOM_STATE)
).reset_index(drop=True)

# Split
df_train = df_sampled[df_sampled[DEVICE_COL] != UNSEEN_DEVICE]
df_test = df_sampled[df_sampled[DEVICE_COL] == UNSEEN_DEVICE]

X_train = df_train[INPUT_COLS].copy()
y_train = df_train[TARGET_COLS].copy()
X_test = df_test[INPUT_COLS].copy()
y_test = df_test[TARGET_COLS].copy()

# Encode Part_Number
le = LabelEncoder()
X_train[DEVICE_COL] = le.fit_transform(X_train[DEVICE_COL])

# For unseen device, assign a new unseen label code
X_test[DEVICE_COL] = X_test[DEVICE_COL].map(lambda x: -1 if x not in le.classes_ else le.transform([x])[0])

# Fill NaNs
X_train = X_train.fillna(X_train.median(numeric_only=True))
X_test = X_test.fillna(X_train.median(numeric_only=True))
y_train = y_train.fillna(y_train.median(numeric_only=True))
y_test = y_test.fillna(y_train.median(numeric_only=True))

# Train model
xgb = XGBRegressor(
    n_estimators=500,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    objective="reg:squarederror",
    random_state=RANDOM_STATE,
    n_jobs=-1
)
model = MultiOutputRegressor(xgb)
model.fit(X_train, y_train)

# Predict
y_pred = pd.DataFrame(model.predict(X_test), columns=TARGET_COLS, index=y_test.index)

# Metrics
def rmse(a,b): return float(np.sqrt(mean_squared_error(a,b)))
def r2(a,b): return float(r2_score(a,b))

metrics = []
for col in TARGET_COLS:
    metrics.append({
        "Target": col,
        "RMSE": rmse(y_test[col], y_pred[col]),
        "R2": r2(y_test[col], y_pred[col])
    })
metrics_df = pd.DataFrame(metrics)
print(metrics_df)

print("\nOverall RMSE:", rmse(y_test.values, y_pred.values))
print("Overall R2:", r2(y_test.values, y_pred.values))


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(


                      Target          RMSE         R2
0   voltage_rise_time_pulse1  7.006509e-09 -12.343755
1   voltage_rise_time_pulse2  6.807252e-09  -3.049424
2   voltage_fall_time_pulse1  1.193986e-08 -15.813116
3   voltage_fall_time_pulse2  1.192467e-08 -15.729264
4   current_rise_time_pulse1  4.851796e-08  -3.958064
5   current_rise_time_pulse2  3.989373e-08  -7.532442
6   current_fall_time_pulse1  1.532225e-08  -0.927853
7   current_fall_time_pulse2  1.530371e-08  -0.930645
8          overshoot_pulse_1  6.765700e+00   0.797249
9          overshoot_pulse_2  1.235287e+01   0.544617
10        undershoot_pulse_1  6.120259e+00   0.288241
11        undershoot_pulse_2  5.850825e+00   0.343485
12     ringing_frequency_MHz  7.760237e+00  -7.167788

Overall RMSE: 5.040444558727504
Overall R2: -5.036827646252018


In [8]:
# ==== XGBoost unseen MOSFET with encoded Part_Number + derived features (25% per device) ====
import os
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder

# --------- USER SETTINGS ---------
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"     # <-- change to your merged CSV path
DEVICE_COL = "Part_Number"            # or "MOSFET"/"Device" if that's your column name
UNSEEN_DEVICE = "C2M0040120D"         # <-- set the unseen MOSFET ID you want
SAMPLE_FRAC = 0.25                    # 25% per device
RANDOM_STATE = 42
# ---------------------------------

# Core columns
COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]  # used to enforce shared configs
RAW_INPUTS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    "Part_Number",  # will be encoded
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max",
    "VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ",
    "Ciss","Coss","Crss",
    "Rth_JC_typ","Rth_JC_max"
]
TARGETS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# ---- load & sanity
df = pd.read_csv(CSV_PATH)
assert DEVICE_COL in df.columns, f"Device column '{DEVICE_COL}' not found. Columns: {df.columns.tolist()}"

# keep only cols that exist
RAW_INPUTS = [c for c in RAW_INPUTS if c in df.columns]
TARGETS = [c for c in TARGETS if c in df.columns]
for c in COMBO_COLS:
    if c not in df.columns:
        raise ValueError(f"Missing combo column: {c}")

# --------- derived features we discussed ----------
def add_derived_features(frame: pd.DataFrame) -> pd.DataFrame:
    f = frame.copy()
    eps = 1e-12      # numerical safety for divisions
    c_stray = 5e-12  # small estimated stray capacitance (adjust if you have a better estimate)

    # L_loop = sum of switching-loop inductances
    L_pieces = [col for col in ["Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"] if col in f.columns]
    if L_pieces:
        f["L_loop"] = f[L_pieces].sum(axis=1)
    else:
        f["L_loop"] = np.nan

    # C_eq_est = Coss + small stray
    if "Coss" in f.columns:
        f["C_eq_est"] = f["Coss"].fillna(0) + c_stray
    else:
        f["C_eq_est"] = np.nan

    # f_res_est = 1/(2π√(L_loop*C_eq_est))
    f["f_res_est"] = 1.0 / (2.0 * np.pi * np.sqrt(np.clip(f["L_loop"] * f["C_eq_est"], eps, None)))

    # gate_drive_strength proxy ~ 1 / (Rg + small)
    if "Rg" in f.columns:
        f["gate_drive_strength"] = 1.0 / (f["Rg"].abs() + 1e-3)
    else:
        f["gate_drive_strength"] = np.nan

    # dv/dt proxy ~ gate_drive_strength / Crss
    if "Crss" in f.columns:
        f["dvdt_proxy"] = f["gate_drive_strength"] / (f["Crss"].abs() + eps)
    else:
        f["dvdt_proxy"] = np.nan

    # Miller ratio ~ Crss / Coss
    if "Crss" in f.columns and "Coss" in f.columns:
        f["miller_ratio"] = f["Crss"].abs() / (f["Coss"].abs() + eps)
    else:
        f["miller_ratio"] = np.nan

    return f

# make a safe combo key (round to limit float glitches)
def make_combo_key(frame: pd.DataFrame) -> pd.Series:
    rounded = frame[COMBO_COLS].apply(lambda s: np.round(s.astype(float), 6))
    return rounded.apply(lambda row: tuple(row.values.tolist()), axis=1)

# ---------- filter to devices and common combos ----------
devices_all = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices_all, f"UNSEEN_DEVICE '{UNSEEN_DEVICE}' not found. Devices: {devices_all}"
train_devices = [d for d in devices_all if d != UNSEEN_DEVICE]

df["_combo_key"] = make_combo_key(df)

combos_unseen = set(df.loc[df[DEVICE_COL]==UNSEEN_DEVICE, "_combo_key"].dropna().unique())
combos_train_sets = [set(df.loc[df[DEVICE_COL]==d, "_combo_key"].dropna().unique()) for d in train_devices]
common_train = set.intersection(*combos_train_sets) if combos_train_sets else set()
common_combos = combos_unseen.intersection(common_train)

df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])].copy()
print(f"Common combos found: {len(common_combos)} | Rows kept: {len(df_common)}")

# ---------- sample 25% per device ----------
df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(
    lambda g: g.sample(frac=SAMPLE_FRAC, random_state=RANDOM_STATE)
).reset_index(drop=True)

# ---------- build feature matrix with derived ----------
feature_cols = list(RAW_INPUTS)  # start with raw inputs
df_features = add_derived_features(df_sampled)

# Add derived names to the feature list
for new_c in ["L_loop","C_eq_est","f_res_est","gate_drive_strength","dvdt_proxy","miller_ratio"]:
    if new_c in df_features.columns:
        feature_cols.append(new_c)

# ensure uniqueness/order
feature_cols = list(dict.fromkeys(feature_cols))

# ---------- split train/test (leave-one-device-out) ----------
df_train = df_features[df_features[DEVICE_COL] != UNSEEN_DEVICE].copy()
df_test  = df_features[df_features[DEVICE_COL] == UNSEEN_DEVICE].copy()

X_train = df_train[feature_cols].copy()
y_train = df_train[[c for c in TARGETS if c in df_train.columns]].copy()
X_test  = df_test[feature_cols].copy()
y_test  = df_test[[c for c in TARGETS if c in df_test.columns]].copy()

# ---------- encode Part_Number (DEVICE_COL) ----------
le = LabelEncoder()
if DEVICE_COL in X_train.columns:
    X_train[DEVICE_COL] = le.fit_transform(X_train[DEVICE_COL].astype(str))
    # unseen → -1
    def encode_test_label(x):
        x = str(x)
        return le.transform([x])[0] if x in le.classes_ else -1
    X_test[DEVICE_COL] = X_test[DEVICE_COL].astype(str).map(encode_test_label)

# ---------- simple NaN handling ----------
X_train = X_train.fillna(X_train.median(numeric_only=True))
X_test  = X_test.fillna(X_train.median(numeric_only=True))
y_train = y_train.fillna(y_train.median(numeric_only=True))
y_test  = y_test.fillna(y_train.median(numeric_only=True))

print(f"Train: X={X_train.shape}, y={y_train.shape} | Test: X={X_test.shape}, y={y_test.shape}")

# ---------- model ----------
xgb = XGBRegressor(
    n_estimators=600,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    objective="reg:squarederror",
    random_state=RANDOM_STATE,
    n_jobs=-1,
)
model = MultiOutputRegressor(xgb)
model.fit(X_train, y_train)

# ---------- evaluate ----------
y_pred = pd.DataFrame(model.predict(X_test), columns=y_test.columns, index=y_test.index)

def rmse(a,b): return float(np.sqrt(mean_squared_error(a, b)))
def r2(a,b):   return float(r2_score(a, b))

per_target = []
for col in y_test.columns:
    per_target.append({
        "target": col,
        "RMSE": rmse(y_test[col], y_pred[col]),
        "R2": r2(y_test[col], y_pred[col]),
        "n_test": int(y_test[col].notna().sum())
    })
metrics_df = pd.DataFrame(per_target).sort_values("R2", ascending=False)
print("\n=== Unseen device:", UNSEEN_DEVICE, "===")
print(metrics_df.to_string(index=False))

print("\nOverall RMSE:", rmse(y_test.values, y_pred.values))
print("Overall R2  :", r2(y_test.values, y_pred.values))

# ---------- save predictions ----------
out_pred = df_test[[DEVICE_COL] + COMBO_COLS].copy()
out_pred = out_pred.join(y_test.reset_index(drop=True), rsuffix="_true")
for c in y_test.columns:
    out_pred[c + "_pred"] = y_pred[c].values
OUT_PATH = os.path.join(os.path.dirname(CSV_PATH), f"xgb_unseen_{UNSEEN_DEVICE}_with_derived.csv")
out_pred.to_csv(OUT_PATH, index=False)
print(f"\n✅ Saved predictions: {OUT_PATH}")


Common combos found: 12 | Rows kept: 431675


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(


Train: X=(86336, 34), y=(86336, 13) | Test: X=(21584, 34), y=(21584, 13)

=== Unseen device: C2M0040120D ===
                  target         RMSE        R2  n_test
       overshoot_pulse_1 7.326889e+00  0.706456   21584
      undershoot_pulse_2 6.252737e+00  0.588121   21584
       overshoot_pulse_2 1.356814e+01  0.571090   21584
      undershoot_pulse_1 6.535605e+00  0.556091   21584
   ringing_frequency_MHz 3.311002e+00  0.247813   21584
voltage_rise_time_pulse2 6.107509e-09 -0.713808   21584
current_fall_time_pulse1 1.732867e-08 -1.382414   21584
current_fall_time_pulse2 1.736815e-08 -1.426522   21584
current_rise_time_pulse2 3.460742e-08 -3.261195   21584
voltage_rise_time_pulse1 4.370969e-09 -5.443350   21584
current_rise_time_pulse1 4.361475e-08 -6.906631   21584
voltage_fall_time_pulse2 7.242882e-09 -8.668226   21584
voltage_fall_time_pulse1 7.252886e-09 -8.705457   21584

Overall RMSE: 5.04252249172598
Overall R2  : -2.6029256572257866

✅ Saved predictions: C:\Users\pc\Desktop

In [13]:
# ===== Simple ANN for unseen MOSFET (common combos + 25% per device + derived + optional embedding) =====
import os, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, regularizers, callbacks, Model

# ------------ USER SETTINGS ------------
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"
DEVICE_COL = "Part_Number"
UNSEEN_DEVICE = "C2M0040120D"
SAMPLE_FRAC = 0.25
RANDOM_STATE = 42
USE_EMBEDDING = False   # set True to include Part_Number embedding (an 'UNK' slot is used for unseen)
# --------------------------------------

np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]
RAW_INPUTS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    # timing/fixture columns if present (optional); add if you have them in this file
    # "Tp1","Tp2","Toff","Tstart",
    # "L1","L2","L3","R1",
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max",
    "VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ",
    "Ciss","Coss","Crss",
    "Rth_JC_typ","Rth_JC_max"
]
TARGETS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# ---- load ----
df = pd.read_csv(CSV_PATH)
assert DEVICE_COL in df.columns, f"'{DEVICE_COL}' not found in columns."

# keep available columns only
RAW_INPUTS = [c for c in RAW_INPUTS if c in df.columns]
TARGETS = [c for c in TARGETS if c in df.columns]
for c in COMBO_COLS:
    if c not in df.columns:
        raise ValueError(f"Missing combo column: {c}")

# ---- derived features ----
def add_derived_features(frame: pd.DataFrame) -> pd.DataFrame:
    f = frame.copy()
    eps = 1e-12
    c_stray = 5e-12
    # L_loop (if L1/L2/L3 not in this file, only sum Ls4..Ls11)
    L_pieces = [c for c in ["Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11","L1","L2","L3"] if c in f.columns]
    f["L_loop"] = f[L_pieces].sum(axis=1) if L_pieces else np.nan
    # C_eq and resonance
    f["C_eq_est"] = (f["Coss"].fillna(0) if "Coss" in f.columns else 0) + c_stray
    f["f_res_est"] = 1.0/(2.0*np.pi*np.sqrt(np.clip(f["L_loop"]*f["C_eq_est"], eps, None)))
    # drive proxies
    f["gate_drive_strength"] = 1.0/(f["Rg"].abs() + 1e-3) if "Rg" in f.columns else np.nan
    f["dvdt_proxy"] = f["gate_drive_strength"]/((f["Crss"].abs() if "Crss" in f.columns else 0)+eps)
    f["miller_ratio"] = (f["Crss"].abs()/(f["Coss"].abs()+eps)) if ("Crss" in f.columns and "Coss" in f.columns) else np.nan
    return f

# ---- make combo key with rounding to avoid float mismatches ----
def make_combo_key(frame: pd.DataFrame) -> pd.Series:
    rounded = frame[COMBO_COLS].apply(lambda s: np.round(s.astype(float), 6))
    return rounded.apply(lambda row: tuple(row.values.tolist()), axis=1)

# ---- filter to common combos & sample 25%/device ----
devices = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices, f"{UNSEEN_DEVICE} not in dataset devices {devices}"
train_devices = [d for d in devices if d != UNSEEN_DEVICE]

df["_combo_key"] = make_combo_key(df)
combos_unseen = set(df.loc[df[DEVICE_COL]==UNSEEN_DEVICE, "_combo_key"].dropna().unique())
combos_train_sets = [set(df.loc[df[DEVICE_COL]==d, "_combo_key"].dropna().unique()) for d in train_devices]
common_train = set.intersection(*combos_train_sets) if combos_train_sets else set()
common_combos = combos_unseen.intersection(common_train)
df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])].copy()

# sample 25% per device
df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(
    lambda g: g.sample(frac=SAMPLE_FRAC, random_state=RANDOM_STATE)
).reset_index(drop=True)

# ---- features + derived ----
feature_cols = list(RAW_INPUTS)  # start with your raw inputs
df_feat = add_derived_features(df_sampled)
for c in ["L_loop","C_eq_est","f_res_est","gate_drive_strength","dvdt_proxy","miller_ratio"]:
    if c in df_feat.columns: feature_cols.append(c)
feature_cols = list(dict.fromkeys(feature_cols))  # de-dupe

# ---- split train/test ----
df_train = df_feat[df_feat[DEVICE_COL] != UNSEEN_DEVICE].copy()
df_test  = df_feat[df_feat[DEVICE_COL] == UNSEEN_DEVICE].copy()

# optional device embedding
if USE_EMBEDDING:
    le = LabelEncoder()
    known = df_train[DEVICE_COL].astype(str)
    le.fit(known.unique().tolist())
    # map train to ids
    df_train["_dev_id"] = le.transform(df_train[DEVICE_COL].astype(str))
    # build UNK for unseen
    unk_id = len(le.classes_)  # next index
    df_test["_dev_id"] = df_test[DEVICE_COL].astype(str).map(lambda s: le.transform([s])[0] if s in le.classes_ else unk_id)
else:
    # drop device col from features
    feature_cols = [c for c in feature_cols if c != DEVICE_COL]

# assemble matrices
X_num_train = df_train[feature_cols].copy()
X_num_test  = df_test[feature_cols].copy()
Y_cols = [c for c in TARGETS if c in df_train.columns]
y_train = df_train[Y_cols].copy()
y_test  = df_test[Y_cols].copy()

# ---- scale inputs & outputs ----
Xscaler = StandardScaler()
Yscalers = {c: StandardScaler() for c in Y_cols}

X_train = Xscaler.fit_transform(X_num_train.fillna(X_num_train.median(numeric_only=True)))
X_test  = Xscaler.transform(X_num_test.fillna(X_num_train.median(numeric_only=True)))

y_train_scaled = np.zeros_like(y_train.values, dtype=float)
y_test_scaled  = np.zeros_like(y_test.values, dtype=float)
for i, c in enumerate(Y_cols):
    y_train_scaled[:, i] = Yscalers[c].fit_transform(y_train[[c]].values).ravel()
    y_test_scaled[:, i]  = Yscalers[c].transform(y_test[[c]].values).ravel()

# ---- build model ----
def build_model(n_num_features, n_targets, n_devices=None, emb_dim=4):
    reg = regularizers.l2(1e-4)
    if USE_EMBEDDING:
        # two-input model: numeric + device id
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        inp_dev = layers.Input(shape=(), dtype="int32", name="dev")
        emb = layers.Embedding(input_dim=n_devices, output_dim=emb_dim, name="dev_emb")(inp_dev)
        emb = layers.Flatten()(emb)
        x = layers.Concatenate()([inp_num, emb])
    else:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        x = inp_num

    x = layers.Dense(128, activation="relu", kernel_regularizer=reg)(x)
    x = layers.Dropout(0.15)(x)
    x = layers.Dense(64, activation="relu", kernel_regularizer=reg)(x)
    x = layers.Dropout(0.1)(x)
    out = layers.Dense(n_targets, activation="linear", name="y")(x)

    if USE_EMBEDDING:
        model = Model(inputs=[inp_num, inp_dev], outputs=out)
    else:
        model = Model(inputs=inp_num, outputs=out)

    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="mse",
                  metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae")])
    return model

n_devices = (df_train["_dev_id"].max() + 2) if USE_EMBEDDING else None  # +1 for 0-based, +1 for UNK
model = build_model(X_train.shape[1], len(Y_cols), n_devices=n_devices, emb_dim=4)

# ---- train ----
es = callbacks.EarlyStopping(monitor="val_loss", patience=20, restore_best_weights=True)
rlr = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=8, min_lr=1e-5)
if USE_EMBEDDING:
    hist = model.fit(
        {"num": X_train, "dev": df_train["_dev_id"].values},
        y_train_scaled,
        validation_split=0.15,
        epochs=200,
        batch_size=256,
        callbacks=[es, rlr],
        verbose=1
    )
else:
    hist = model.fit(
        X_train, y_train_scaled,
        validation_split=0.15,
        epochs=200,
        batch_size=256,
        callbacks=[es, rlr],
        verbose=1
    )

# ---- predict ----
if USE_EMBEDDING:
    y_pred_scaled = model.predict({"num": X_test, "dev": df_test["_dev_id"].values}, verbose=0)
else:
    y_pred_scaled = model.predict(X_test, verbose=0)

# inverse-scale predictions
y_pred = np.zeros_like(y_pred_scaled)
for i, c in enumerate(Y_cols):
    y_pred[:, i] = Yscalers[c].inverse_transform(y_pred_scaled[:, [i]]).ravel()

y_pred_df = pd.DataFrame(y_pred, columns=Y_cols, index=y_test.index)

# ---- metrics ----
def rmse(a,b): return float(np.sqrt(mean_squared_error(a, b)))
def r2(a,b):   return float(r2_score(a, b))

per_target = []
for c in Y_cols:
    per_target.append({"target": c, "RMSE": rmse(y_test[c], y_pred_df[c]), "R2": r2(y_test[c], y_pred_df[c]), "n_test": int(y_test[c].size)})
metrics_df = pd.DataFrame(per_target).sort_values("R2", ascending=False)
print("\n=== Unseen device:", UNSEEN_DEVICE, "===")
print(metrics_df.to_string(index=False))

print("\nOverall RMSE:", rmse(y_test.values, y_pred_df.values))
print("Overall R2  :", r2(y_test.values, y_pred_df.values))

# ---- save predictions ----
OUT_PATH = os.path.join(os.path.dirname(CSV_PATH), f"ann_unseen_{UNSEEN_DEVICE}_{'withEmb' if USE_EMBEDDING else 'noEmb'}.csv")
save_cols = [DEVICE_COL] + COMBO_COLS
save_cols = [c for c in save_cols if c in df_test.columns]
out = df_test[save_cols].copy()
for c in Y_cols:
    out[c+"_true"] = y_test[c].values
    out[c+"_pred"] = y_pred_df[c].values
out.to_csv(OUT_PATH, index=False)
print(f"\n✅ Saved predictions: {OUT_PATH}")


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(


Epoch 1/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.3467 - mae: 0.4017 - val_loss: 0.0454 - val_mae: 0.1293 - learning_rate: 0.0010
Epoch 2/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1020 - mae: 0.2082 - val_loss: 0.0377 - val_mae: 0.1129 - learning_rate: 0.0010
Epoch 3/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0857 - mae: 0.1867 - val_loss: 0.0372 - val_mae: 0.1150 - learning_rate: 0.0010
Epoch 4/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0776 - mae: 0.1767 - val_loss: 0.0352 - val_mae: 0.1097 - learning_rate: 0.0010
Epoch 5/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0729 - mae: 0.1710 - val_loss: 0.0313 - val_mae: 0.0989 - learning_rate: 0.0010
Epoch 6/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0696 - mae: 0.1663

In [14]:
# ===== Simple ANN for unseen MOSFET (common combos + 25% per device + derived + optional embedding) =====
import os, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, regularizers, callbacks, Model

# ------------ USER SETTINGS ------------
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"
DEVICE_COL = "Part_Number"
UNSEEN_DEVICE = "C2M0025120D"
SAMPLE_FRAC = 0.25
RANDOM_STATE = 42
USE_EMBEDDING = False   # set True to include Part_Number embedding (an 'UNK' slot is used for unseen)
# --------------------------------------

np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]
RAW_INPUTS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    # timing/fixture columns if present (optional); add if you have them in this file
    # "Tp1","Tp2","Toff","Tstart",
    # "L1","L2","L3","R1",
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max",
    "VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ",
    "Ciss","Coss","Crss",
    "Rth_JC_typ","Rth_JC_max"
]
TARGETS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# ---- load ----
df = pd.read_csv(CSV_PATH)
assert DEVICE_COL in df.columns, f"'{DEVICE_COL}' not found in columns."

# keep available columns only
RAW_INPUTS = [c for c in RAW_INPUTS if c in df.columns]
TARGETS = [c for c in TARGETS if c in df.columns]
for c in COMBO_COLS:
    if c not in df.columns:
        raise ValueError(f"Missing combo column: {c}")

# ---- derived features ----
def add_derived_features(frame: pd.DataFrame) -> pd.DataFrame:
    f = frame.copy()
    eps = 1e-12
    c_stray = 5e-12
    # L_loop (if L1/L2/L3 not in this file, only sum Ls4..Ls11)
    L_pieces = [c for c in ["Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11","L1","L2","L3"] if c in f.columns]
    f["L_loop"] = f[L_pieces].sum(axis=1) if L_pieces else np.nan
    # C_eq and resonance
    f["C_eq_est"] = (f["Coss"].fillna(0) if "Coss" in f.columns else 0) + c_stray
    f["f_res_est"] = 1.0/(2.0*np.pi*np.sqrt(np.clip(f["L_loop"]*f["C_eq_est"], eps, None)))
    # drive proxies
    f["gate_drive_strength"] = 1.0/(f["Rg"].abs() + 1e-3) if "Rg" in f.columns else np.nan
    f["dvdt_proxy"] = f["gate_drive_strength"]/((f["Crss"].abs() if "Crss" in f.columns else 0)+eps)
    f["miller_ratio"] = (f["Crss"].abs()/(f["Coss"].abs()+eps)) if ("Crss" in f.columns and "Coss" in f.columns) else np.nan
    return f

# ---- make combo key with rounding to avoid float mismatches ----
def make_combo_key(frame: pd.DataFrame) -> pd.Series:
    rounded = frame[COMBO_COLS].apply(lambda s: np.round(s.astype(float), 6))
    return rounded.apply(lambda row: tuple(row.values.tolist()), axis=1)

# ---- filter to common combos & sample 25%/device ----
devices = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices, f"{UNSEEN_DEVICE} not in dataset devices {devices}"
train_devices = [d for d in devices if d != UNSEEN_DEVICE]

df["_combo_key"] = make_combo_key(df)
combos_unseen = set(df.loc[df[DEVICE_COL]==UNSEEN_DEVICE, "_combo_key"].dropna().unique())
combos_train_sets = [set(df.loc[df[DEVICE_COL]==d, "_combo_key"].dropna().unique()) for d in train_devices]
common_train = set.intersection(*combos_train_sets) if combos_train_sets else set()
common_combos = combos_unseen.intersection(common_train)
df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])].copy()

# sample 25% per device
df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(
    lambda g: g.sample(frac=SAMPLE_FRAC, random_state=RANDOM_STATE)
).reset_index(drop=True)

# ---- features + derived ----
feature_cols = list(RAW_INPUTS)  # start with your raw inputs
df_feat = add_derived_features(df_sampled)
for c in ["L_loop","C_eq_est","f_res_est","gate_drive_strength","dvdt_proxy","miller_ratio"]:
    if c in df_feat.columns: feature_cols.append(c)
feature_cols = list(dict.fromkeys(feature_cols))  # de-dupe

# ---- split train/test ----
df_train = df_feat[df_feat[DEVICE_COL] != UNSEEN_DEVICE].copy()
df_test  = df_feat[df_feat[DEVICE_COL] == UNSEEN_DEVICE].copy()

# optional device embedding
if USE_EMBEDDING:
    le = LabelEncoder()
    known = df_train[DEVICE_COL].astype(str)
    le.fit(known.unique().tolist())
    # map train to ids
    df_train["_dev_id"] = le.transform(df_train[DEVICE_COL].astype(str))
    # build UNK for unseen
    unk_id = len(le.classes_)  # next index
    df_test["_dev_id"] = df_test[DEVICE_COL].astype(str).map(lambda s: le.transform([s])[0] if s in le.classes_ else unk_id)
else:
    # drop device col from features
    feature_cols = [c for c in feature_cols if c != DEVICE_COL]

# assemble matrices
X_num_train = df_train[feature_cols].copy()
X_num_test  = df_test[feature_cols].copy()
Y_cols = [c for c in TARGETS if c in df_train.columns]
y_train = df_train[Y_cols].copy()
y_test  = df_test[Y_cols].copy()

# ---- scale inputs & outputs ----
Xscaler = StandardScaler()
Yscalers = {c: StandardScaler() for c in Y_cols}

X_train = Xscaler.fit_transform(X_num_train.fillna(X_num_train.median(numeric_only=True)))
X_test  = Xscaler.transform(X_num_test.fillna(X_num_train.median(numeric_only=True)))

y_train_scaled = np.zeros_like(y_train.values, dtype=float)
y_test_scaled  = np.zeros_like(y_test.values, dtype=float)
for i, c in enumerate(Y_cols):
    y_train_scaled[:, i] = Yscalers[c].fit_transform(y_train[[c]].values).ravel()
    y_test_scaled[:, i]  = Yscalers[c].transform(y_test[[c]].values).ravel()

# ---- build model ----
def build_model(n_num_features, n_targets, n_devices=None, emb_dim=4):
    reg = regularizers.l2(1e-4)
    if USE_EMBEDDING:
        # two-input model: numeric + device id
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        inp_dev = layers.Input(shape=(), dtype="int32", name="dev")
        emb = layers.Embedding(input_dim=n_devices, output_dim=emb_dim, name="dev_emb")(inp_dev)
        emb = layers.Flatten()(emb)
        x = layers.Concatenate()([inp_num, emb])
    else:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        x = inp_num

    x = layers.Dense(128, activation="relu", kernel_regularizer=reg)(x)
    x = layers.Dropout(0.15)(x)
    x = layers.Dense(64, activation="relu", kernel_regularizer=reg)(x)
    x = layers.Dropout(0.1)(x)
    out = layers.Dense(n_targets, activation="linear", name="y")(x)

    if USE_EMBEDDING:
        model = Model(inputs=[inp_num, inp_dev], outputs=out)
    else:
        model = Model(inputs=inp_num, outputs=out)

    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss="mse",
                  metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae")])
    return model

n_devices = (df_train["_dev_id"].max() + 2) if USE_EMBEDDING else None  # +1 for 0-based, +1 for UNK
model = build_model(X_train.shape[1], len(Y_cols), n_devices=n_devices, emb_dim=4)

# ---- train ----
es = callbacks.EarlyStopping(monitor="val_loss", patience=20, restore_best_weights=True)
rlr = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=8, min_lr=1e-5)
if USE_EMBEDDING:
    hist = model.fit(
        {"num": X_train, "dev": df_train["_dev_id"].values},
        y_train_scaled,
        validation_split=0.15,
        epochs=200,
        batch_size=256,
        callbacks=[es, rlr],
        verbose=1
    )
else:
    hist = model.fit(
        X_train, y_train_scaled,
        validation_split=0.15,
        epochs=200,
        batch_size=256,
        callbacks=[es, rlr],
        verbose=1
    )

# ---- predict ----
if USE_EMBEDDING:
    y_pred_scaled = model.predict({"num": X_test, "dev": df_test["_dev_id"].values}, verbose=0)
else:
    y_pred_scaled = model.predict(X_test, verbose=0)

# inverse-scale predictions
y_pred = np.zeros_like(y_pred_scaled)
for i, c in enumerate(Y_cols):
    y_pred[:, i] = Yscalers[c].inverse_transform(y_pred_scaled[:, [i]]).ravel()

y_pred_df = pd.DataFrame(y_pred, columns=Y_cols, index=y_test.index)

# ---- metrics ----
def rmse(a,b): return float(np.sqrt(mean_squared_error(a, b)))
def r2(a,b):   return float(r2_score(a, b))

per_target = []
for c in Y_cols:
    per_target.append({"target": c, "RMSE": rmse(y_test[c], y_pred_df[c]), "R2": r2(y_test[c], y_pred_df[c]), "n_test": int(y_test[c].size)})
metrics_df = pd.DataFrame(per_target).sort_values("R2", ascending=False)
print("\n=== Unseen device:", UNSEEN_DEVICE, "===")
print(metrics_df.to_string(index=False))

print("\nOverall RMSE:", rmse(y_test.values, y_pred_df.values))
print("Overall R2  :", r2(y_test.values, y_pred_df.values))

# ---- save predictions ----
OUT_PATH = os.path.join(os.path.dirname(CSV_PATH), f"ann_unseen_{UNSEEN_DEVICE}_{'withEmb' if USE_EMBEDDING else 'noEmb'}.csv")
save_cols = [DEVICE_COL] + COMBO_COLS
save_cols = [c for c in save_cols if c in df_test.columns]
out = df_test[save_cols].copy()
for c in Y_cols:
    out[c+"_true"] = y_test[c].values
    out[c+"_pred"] = y_pred_df[c].values
out.to_csv(OUT_PATH, index=False)
print(f"\n✅ Saved predictions: {OUT_PATH}")


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(


Epoch 1/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.3249 - mae: 0.3925 - val_loss: 0.0442 - val_mae: 0.1247 - learning_rate: 0.0010
Epoch 2/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.1106 - mae: 0.2114 - val_loss: 0.0398 - val_mae: 0.1186 - learning_rate: 0.0010
Epoch 3/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0931 - mae: 0.1900 - val_loss: 0.0353 - val_mae: 0.1071 - learning_rate: 0.0010
Epoch 4/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0854 - mae: 0.1799 - val_loss: 0.0343 - val_mae: 0.1055 - learning_rate: 0.0010
Epoch 5/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0812 - mae: 0.1740 - val_loss: 0.0335 - val_mae: 0.1057 - learning_rate: 0.0010
Epoch 6/200
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0772 - mae: 0.1690

In [1]:
# ===== ANN for unseen MOSFET (common combos + 25% per device + rich derived + optional embedding + log targets + weighted loss) =====
import os, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, regularizers, callbacks, Model

# ------------ USER SETTINGS ------------
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"
DEVICE_COL = "Part_Number"
UNSEEN_DEVICE = "C2M0025120D"
SAMPLE_FRAC = 0.25
RANDOM_STATE = 42
USE_EMBEDDING = False     # True -> include device embedding with UNK for unseen
APPLY_LOG1P = True        # log1p transform for skewed time targets + ringing
# Per-target weights (emphasize the hard ones)
TARGET_WEIGHTS = {
    "voltage_rise_time_pulse1": 2.0,
    "voltage_rise_time_pulse2": 2.0,
    "voltage_fall_time_pulse1": 2.0,
    "voltage_fall_time_pulse2": 2.0,
    "current_rise_time_pulse1": 1.5,
    "current_rise_time_pulse2": 1.5,
    "current_fall_time_pulse1": 1.0,
    "current_fall_time_pulse2": 1.0,
    "overshoot_pulse_1": 1.0,
    "overshoot_pulse_2": 1.0,
    "undershoot_pulse_1": 1.0,
    "undershoot_pulse_2": 1.0,
    "ringing_frequency_MHz": 2.5
}
# --------------------------------------

np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]

RAW_INPUTS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    # include if present
    "Tp1","Tp2","Toff","Tstart","L1","L2","L3","R1",
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max",
    "VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ",
    "Ciss","Coss","Crss",
    "Rth_JC_typ","Rth_JC_max","Tj_max",
    DEVICE_COL
]

TARGETS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# ---- load ----
df = pd.read_csv(CSV_PATH)
assert DEVICE_COL in df.columns, f"'{DEVICE_COL}' not found."

RAW_INPUTS = [c for c in RAW_INPUTS if c in df.columns]
TARGETS = [c for c in TARGETS if c in df.columns]
for c in COMBO_COLS:
    if c not in df.columns:
        raise ValueError(f"Missing combo column: {c}")

# ---- derived features ----
def add_derived_features(frame: pd.DataFrame) -> pd.DataFrame:
    f = frame.copy()
    eps = 1e-18
    c_stray = 5e-12
    # L_loop
    L_pieces = [c for c in ["Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11","L1","L2","L3"] if c in f.columns]
    f["L_loop"] = f[L_pieces].sum(axis=1) if L_pieces else np.nan
    # C_eq, resonance
    f["C_eq_est"] = (f["Coss"] if "Coss" in f.columns else 0).fillna(0) + c_stray
    f["f_res_est"] = 1.0/(2.0*np.pi*np.sqrt(np.clip(f["L_loop"]*f["C_eq_est"], eps, None)))
    # drive proxies
    f["gate_drive_strength"] = 1.0/((f["Rg"].abs() if "Rg" in f.columns else 0)+1e-3)
    f["dvdt_proxy"] = f["gate_drive_strength"]/((f["Crss"].abs() if "Crss" in f.columns else 0)+1e-15)
    f["miller_ratio"] = (f["Crss"].abs()/(f["Coss"].abs()+1e-15)) if ("Crss" in f.columns and "Coss" in f.columns) else np.nan
    # energy/cap ratios
    f["E_total"] = (f.get("Eon_typ", 0)).fillna(0) + (f.get("Eoff_typ", 0)).fillna(0)
    f["Cgd_ratio"] = (f["Crss"].abs()/(f["Ciss"].abs()+1e-15)) if ("Crss" in f.columns and "Ciss" in f.columns) else np.nan
    f["Qrr_over_Qg"] = (f["Qrr_typ"].abs()/(f["Qg_total"].abs()+1e-15)) if ("Qrr_typ" in f.columns and "Qg_total" in f.columns) else np.nan
    # damping proxy zeta ~ 0.5*R*sqrt(C/L)
    if "R1" in f.columns:
        f["zeta_proxy"] = 0.5*(f["R1"].abs())*np.sqrt(np.clip(f["C_eq_est"]/(f["L_loop"]+eps), eps, None))
    else:
        f["zeta_proxy"] = np.nan
    return f

def make_combo_key(frame: pd.DataFrame) -> pd.Series:
    rounded = frame[COMBO_COLS].apply(lambda s: np.round(s.astype(float), 6))
    return rounded.apply(lambda row: tuple(row.values.tolist()), axis=1)

# ---- filter to common combos & sample 25%/device ----
devices = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices, f"{UNSEEN_DEVICE} not in dataset {devices}"
train_devices = [d for d in devices if d != UNSEEN_DEVICE]

df["_combo_key"] = make_combo_key(df)
combos_unseen = set(df.loc[df[DEVICE_COL]==UNSEEN_DEVICE, "_combo_key"].dropna().unique())
combos_train_sets = [set(df.loc[df[DEVICE_COL]==d, "_combo_key"].dropna().unique()) for d in train_devices]
common_train = set.intersection(*combos_train_sets) if combos_train_sets else set()
common_combos = combos_unseen.intersection(common_train)
df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])].copy()

df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(
    lambda g: g.sample(frac=SAMPLE_FRAC, random_state=RANDOM_STATE)
).reset_index(drop=True)

# ---- features + derived ----
feature_cols = list(RAW_INPUTS)
df_feat = add_derived_features(df_sampled)
for c in ["L_loop","C_eq_est","f_res_est","gate_drive_strength","dvdt_proxy","miller_ratio",
          "E_total","Cgd_ratio","Qrr_over_Qg","zeta_proxy"]:
    if c in df_feat.columns: feature_cols.append(c)
feature_cols = list(dict.fromkeys(feature_cols))

# ---- split ----
df_train = df_feat[df_feat[DEVICE_COL] != UNSEEN_DEVICE].copy()
df_test  = df_feat[df_feat[DEVICE_COL] == UNSEEN_DEVICE].copy()

# optional embedding
if USE_EMBEDDING:
    le = LabelEncoder()
    known = df_train[DEVICE_COL].astype(str)
    le.fit(known.unique().tolist())
    df_train["_dev_id"] = le.transform(df_train[DEVICE_COL].astype(str))
    unk_id = len(le.classes_)
    df_test["_dev_id"] = df_test[DEVICE_COL].astype(str).map(lambda s: le.transform([s])[0] if s in le.classes_ else unk_id)
else:
    feature_cols = [c for c in feature_cols if c != DEVICE_COL]

# assemble matrices
X_num_train = df_train[feature_cols].copy()
X_num_test  = df_test[feature_cols].copy()
Y_cols = [c for c in TARGETS if c in df_train.columns]
y_train = df_train[Y_cols].copy()
y_test  = df_test[Y_cols].copy()

# ---- optional log1p on selected targets ----
log_targets = set()
if APPLY_LOG1P:
    for c in Y_cols:
        if ("rise_time" in c) or ("fall_time" in c) or ("ringing_frequency" in c):
            log_targets.add(c)
def log1p_df(df_, cols):
    out = df_.copy()
    for c in cols:
        out[c] = np.log1p(np.clip(out[c].values, a_min=0, a_max=None))
    return out
def expm1_df(df_, cols):
    out = df_.copy()
    for c in cols:
        out[c] = np.expm1(out[c].values)
    return out

y_train_for_scale = log1p_df(y_train, log_targets) if log_targets else y_train.copy()
y_test_for_scale  = log1p_df(y_test, log_targets)  if log_targets else y_test.copy()

# ---- scale inputs & outputs ----
Xscaler = StandardScaler()
Yscalers = {c: StandardScaler() for c in Y_cols}

X_train = Xscaler.fit_transform(X_num_train.fillna(X_num_train.median(numeric_only=True)))
X_test  = Xscaler.transform(X_num_test.fillna(X_num_train.median(numeric_only=True)))

y_train_scaled = np.zeros_like(y_train_for_scale.values, dtype=float)
y_test_scaled  = np.zeros_like(y_test_for_scale.values, dtype=float)
for i, c in enumerate(Y_cols):
    y_train_scaled[:, i] = Yscalers[c].fit_transform(y_train_for_scale[[c]].values).ravel()
    y_test_scaled[:, i]  = Yscalers[c].transform(y_test_for_scale[[c]].values).ravel()

# ---- weighted loss ----
# MSE with per-output weights: loss = mean(sum(w_i * (y_i - yhat_i)^2))
weights_vec = np.array([TARGET_WEIGHTS.get(c, 1.0) for c in Y_cols], dtype="float32")
weights_tf = tf.constant(weights_vec, dtype=tf.float32)
def weighted_mse(y_true, y_pred):
    err = y_true - y_pred
    return tf.reduce_mean(tf.reduce_sum(weights_tf * tf.square(err), axis=-1))

# ---- build model ----
def build_model(n_num_features, n_targets, n_devices=None, emb_dim=4):
    reg = regularizers.l2(1e-4)
    if USE_EMBEDDING:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        inp_dev = layers.Input(shape=(), dtype="int32", name="dev")
        emb = layers.Embedding(input_dim=n_devices, output_dim=emb_dim, name="dev_emb")(inp_dev)
        emb = layers.Flatten()(emb)
        x = layers.Concatenate()([inp_num, emb])
    else:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        x = inp_num

    x = layers.Dense(192, activation="relu", kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(96, activation="relu", kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.15)(x)
    x = layers.Dense(64, activation="relu", kernel_regularizer=reg)(x)
    out = layers.Dense(n_targets, activation="linear", name="y")(x)

    model = Model(inputs=[inp_num, inp_dev], outputs=out) if USE_EMBEDDING else Model(inputs=inp_num, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss=weighted_mse, metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae")])
    return model

n_devices = (df_train["_dev_id"].max() + 2) if USE_EMBEDDING else None
model = build_model(X_train.shape[1], len(Y_cols), n_devices=n_devices, emb_dim=4)

# ---- train ----
es = callbacks.EarlyStopping(monitor="val_loss", patience=25, restore_best_weights=True)
rlr = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, min_lr=1e-5)

if USE_EMBEDDING:
    hist = model.fit({"num": X_train, "dev": df_train["_dev_id"].values}, y_train_scaled,
                     validation_split=0.15, epochs=300, batch_size=256, callbacks=[es, rlr], verbose=1)
else:
    hist = model.fit(X_train, y_train_scaled,
                     validation_split=0.15, epochs=300, batch_size=256, callbacks=[es, rlr], verbose=1)

# ---- predict ----
if USE_EMBEDDING:
    y_pred_scaled = model.predict({"num": X_test, "dev": df_test["_dev_id"].values}, verbose=0)
else:
    y_pred_scaled = model.predict(X_test, verbose=0)

# inverse scale + inverse log
y_pred_df = pd.DataFrame(y_pred_scaled, columns=Y_cols, index=y_test.index)
for i, c in enumerate(Y_cols):
    y_pred_df[c] = Yscalers[c].inverse_transform(y_pred_df[[c]].values).ravel()
if log_targets:
    y_pred_df = expm1_df(y_pred_df, log_targets)

# ---- metrics ----
def rmse(a,b): return float(np.sqrt(mean_squared_error(a, b)))
def r2(a,b):   return float(r2_score(a, b))

per_target = []
y_true_eval = y_test.copy()
if log_targets:
    # y_test_for_scale was log+scaled; we want to evaluate in the original space
    y_true_eval = y_test.copy()  # already original space

for c in Y_cols:
    per_target.append({
        "target": c,
        "RMSE": rmse(y_true_eval[c], y_pred_df[c]),
        "R2": r2(y_true_eval[c], y_pred_df[c]),
        "n_test": int(y_true_eval[c].size)
    })
metrics_df = pd.DataFrame(per_target).sort_values("R2", ascending=False)
print("\n=== Unseen device:", UNSEEN_DEVICE, "===")
print(metrics_df.to_string(index=False))

print("\nOverall RMSE:", rmse(y_true_eval.values, y_pred_df.values))
print("Overall R2  :", r2(y_true_eval.values, y_pred_df.values))

# ---- save predictions ----
OUT_PATH = os.path.join(os.path.dirname(CSV_PATH), f"ann_unseen_{UNSEEN_DEVICE}_{'withEmb' if USE_EMBEDDING else 'noEmb'}_log{int(APPLY_LOG1P)}_wloss.csv")
save_cols = [DEVICE_COL] + COMBO_COLS
save_cols = [c for c in save_cols if c in df_test.columns]
out = df_test[save_cols].copy()
for c in Y_cols:
    out[c+"_true"] = y_true_eval[c].values
    out[c+"_pred"] = y_pred_df[c].values
out.to_csv(OUT_PATH, index=False)
print(f"\n✅ Saved predictions: {OUT_PATH}")


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(


Epoch 1/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 11.0115 - mae: 0.5217 - val_loss: 2.6975 - val_mae: 0.3049 - learning_rate: 0.0010
Epoch 2/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 2.1330 - mae: 0.2348 - val_loss: 0.8424 - val_mae: 0.1487 - learning_rate: 0.0010
Epoch 3/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1.6640 - mae: 0.1996 - val_loss: 0.6652 - val_mae: 0.1251 - learning_rate: 0.0010
Epoch 4/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 1.4690 - mae: 0.1829 - val_loss: 0.6180 - val_mae: 0.1207 - learning_rate: 0.0010
Epoch 5/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 1.3683 - mae: 0.1733 - val_loss: 0.6567 - val_mae: 0.1295 - learning_rate: 0.0010
Epoch 6/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1.3007 - mae: 0.166

In [7]:
# ===== ANN for unseen MOSFET (common combos + 25% per device + rich derived + optional embedding + log targets + weighted loss) =====
import os, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, regularizers, callbacks, Model

# ------------ USER SETTINGS ------------
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"
DEVICE_COL = "Part_Number"
UNSEEN_DEVICE = "C2M0040120D"
SAMPLE_FRAC = 0.25
RANDOM_STATE = 42
USE_EMBEDDING = False     # True -> include device embedding with UNK for unseen
APPLY_LOG1P = True        # log1p transform for skewed time targets + ringing
# Per-target weights (emphasize the hard ones)
TARGET_WEIGHTS = {
    "voltage_rise_time_pulse1": 2.0,
    "voltage_rise_time_pulse2": 2.0,
    "voltage_fall_time_pulse1": 2.0,
    "voltage_fall_time_pulse2": 2.0,
    "current_rise_time_pulse1": 1.5,
    "current_rise_time_pulse2": 1.5,
    "current_fall_time_pulse1": 1.0,
    "current_fall_time_pulse2": 1.0,
    "overshoot_pulse_1": 1.0,
    "overshoot_pulse_2": 1.0,
    "undershoot_pulse_1": 1.0,
    "undershoot_pulse_2": 1.0,
    "ringing_frequency_MHz": 2.5
}
# --------------------------------------

np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]

RAW_INPUTS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    # include if present
    "Tp1","Tp2","Toff","Tstart","L1","L2","L3","R1",
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max",
    "VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ",
    "Ciss","Coss","Crss",
    "Rth_JC_typ","Rth_JC_max","Tj_max",
    DEVICE_COL
]

TARGETS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# ---- load ----
df = pd.read_csv(CSV_PATH)
assert DEVICE_COL in df.columns, f"'{DEVICE_COL}' not found."

RAW_INPUTS = [c for c in RAW_INPUTS if c in df.columns]
TARGETS = [c for c in TARGETS if c in df.columns]
for c in COMBO_COLS:
    if c not in df.columns:
        raise ValueError(f"Missing combo column: {c}")

# ---- derived features ----
def add_derived_features(frame: pd.DataFrame) -> pd.DataFrame:
    f = frame.copy()
    eps = 1e-18
    c_stray = 5e-12
    # L_loop
    L_pieces = [c for c in ["Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11","L1","L2","L3"] if c in f.columns]
    f["L_loop"] = f[L_pieces].sum(axis=1) if L_pieces else np.nan
    # C_eq, resonance
    f["C_eq_est"] = (f["Coss"] if "Coss" in f.columns else 0).fillna(0) + c_stray
    f["f_res_est"] = 1.0/(2.0*np.pi*np.sqrt(np.clip(f["L_loop"]*f["C_eq_est"], eps, None)))
    # drive proxies
    f["gate_drive_strength"] = 1.0/((f["Rg"].abs() if "Rg" in f.columns else 0)+1e-3)
    f["dvdt_proxy"] = f["gate_drive_strength"]/((f["Crss"].abs() if "Crss" in f.columns else 0)+1e-15)
    f["miller_ratio"] = (f["Crss"].abs()/(f["Coss"].abs()+1e-15)) if ("Crss" in f.columns and "Coss" in f.columns) else np.nan
    # energy/cap ratios
    f["E_total"] = (f.get("Eon_typ", 0)).fillna(0) + (f.get("Eoff_typ", 0)).fillna(0)
    f["Cgd_ratio"] = (f["Crss"].abs()/(f["Ciss"].abs()+1e-15)) if ("Crss" in f.columns and "Ciss" in f.columns) else np.nan
    f["Qrr_over_Qg"] = (f["Qrr_typ"].abs()/(f["Qg_total"].abs()+1e-15)) if ("Qrr_typ" in f.columns and "Qg_total" in f.columns) else np.nan
    # damping proxy zeta ~ 0.5*R*sqrt(C/L)
    if "R1" in f.columns:
        f["zeta_proxy"] = 0.5*(f["R1"].abs())*np.sqrt(np.clip(f["C_eq_est"]/(f["L_loop"]+eps), eps, None))
    else:
        f["zeta_proxy"] = np.nan
    return f

def make_combo_key(frame: pd.DataFrame) -> pd.Series:
    rounded = frame[COMBO_COLS].apply(lambda s: np.round(s.astype(float), 6))
    return rounded.apply(lambda row: tuple(row.values.tolist()), axis=1)

# ---- filter to common combos & sample 25%/device ----
devices = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices, f"{UNSEEN_DEVICE} not in dataset {devices}"
train_devices = [d for d in devices if d != UNSEEN_DEVICE]

df["_combo_key"] = make_combo_key(df)
combos_unseen = set(df.loc[df[DEVICE_COL]==UNSEEN_DEVICE, "_combo_key"].dropna().unique())
combos_train_sets = [set(df.loc[df[DEVICE_COL]==d, "_combo_key"].dropna().unique()) for d in train_devices]
common_train = set.intersection(*combos_train_sets) if combos_train_sets else set()
common_combos = combos_unseen.intersection(common_train)
df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])].copy()

df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(
    lambda g: g.sample(frac=SAMPLE_FRAC, random_state=RANDOM_STATE)
).reset_index(drop=True)

# ---- features + derived ----
feature_cols = list(RAW_INPUTS)
df_feat = add_derived_features(df_sampled)
for c in ["L_loop","C_eq_est","f_res_est","gate_drive_strength","dvdt_proxy","miller_ratio",
          "E_total","Cgd_ratio","Qrr_over_Qg","zeta_proxy"]:
    if c in df_feat.columns: feature_cols.append(c)
feature_cols = list(dict.fromkeys(feature_cols))

# ---- split ----
df_train = df_feat[df_feat[DEVICE_COL] != UNSEEN_DEVICE].copy()
df_test  = df_feat[df_feat[DEVICE_COL] == UNSEEN_DEVICE].copy()

# optional embedding
if USE_EMBEDDING:
    le = LabelEncoder()
    known = df_train[DEVICE_COL].astype(str)
    le.fit(known.unique().tolist())
    df_train["_dev_id"] = le.transform(df_train[DEVICE_COL].astype(str))
    unk_id = len(le.classes_)
    df_test["_dev_id"] = df_test[DEVICE_COL].astype(str).map(lambda s: le.transform([s])[0] if s in le.classes_ else unk_id)
else:
    feature_cols = [c for c in feature_cols if c != DEVICE_COL]

# assemble matrices
X_num_train = df_train[feature_cols].copy()
X_num_test  = df_test[feature_cols].copy()
Y_cols = [c for c in TARGETS if c in df_train.columns]
y_train = df_train[Y_cols].copy()
y_test  = df_test[Y_cols].copy()

# ---- optional log1p on selected targets ----
log_targets = set()
if APPLY_LOG1P:
    for c in Y_cols:
        if ("rise_time" in c) or ("fall_time" in c) or ("ringing_frequency" in c):
            log_targets.add(c)
def log1p_df(df_, cols):
    out = df_.copy()
    for c in cols:
        out[c] = np.log1p(np.clip(out[c].values, a_min=0, a_max=None))
    return out
def expm1_df(df_, cols):
    out = df_.copy()
    for c in cols:
        out[c] = np.expm1(out[c].values)
    return out

y_train_for_scale = log1p_df(y_train, log_targets) if log_targets else y_train.copy()
y_test_for_scale  = log1p_df(y_test, log_targets)  if log_targets else y_test.copy()

# ---- scale inputs & outputs ----
Xscaler = StandardScaler()
Yscalers = {c: StandardScaler() for c in Y_cols}

X_train = Xscaler.fit_transform(X_num_train.fillna(X_num_train.median(numeric_only=True)))
X_test  = Xscaler.transform(X_num_test.fillna(X_num_train.median(numeric_only=True)))

y_train_scaled = np.zeros_like(y_train_for_scale.values, dtype=float)
y_test_scaled  = np.zeros_like(y_test_for_scale.values, dtype=float)
for i, c in enumerate(Y_cols):
    y_train_scaled[:, i] = Yscalers[c].fit_transform(y_train_for_scale[[c]].values).ravel()
    y_test_scaled[:, i]  = Yscalers[c].transform(y_test_for_scale[[c]].values).ravel()

# ---- weighted loss ----
# MSE with per-output weights: loss = mean(sum(w_i * (y_i - yhat_i)^2))
weights_vec = np.array([TARGET_WEIGHTS.get(c, 1.0) for c in Y_cols], dtype="float32")
weights_tf = tf.constant(weights_vec, dtype=tf.float32)
def weighted_mse(y_true, y_pred):
    err = y_true - y_pred
    return tf.reduce_mean(tf.reduce_sum(weights_tf * tf.square(err), axis=-1))

# ---- build model ----
def build_model(n_num_features, n_targets, n_devices=None, emb_dim=4):
    reg = regularizers.l2(1e-4)
    if USE_EMBEDDING:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        inp_dev = layers.Input(shape=(), dtype="int32", name="dev")
        emb = layers.Embedding(input_dim=n_devices, output_dim=emb_dim, name="dev_emb")(inp_dev)
        emb = layers.Flatten()(emb)
        x = layers.Concatenate()([inp_num, emb])
    else:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        x = inp_num

    x = layers.Dense(192, activation="relu", kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(96, activation="relu", kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.15)(x)
    x = layers.Dense(64, activation="relu", kernel_regularizer=reg)(x)
    out = layers.Dense(n_targets, activation="linear", name="y")(x)

    model = Model(inputs=[inp_num, inp_dev], outputs=out) if USE_EMBEDDING else Model(inputs=inp_num, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss=weighted_mse, metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae")])
    return model

n_devices = (df_train["_dev_id"].max() + 2) if USE_EMBEDDING else None
model = build_model(X_train.shape[1], len(Y_cols), n_devices=n_devices, emb_dim=4)

# ---- train ----
es = callbacks.EarlyStopping(monitor="val_loss", patience=25, restore_best_weights=True)
rlr = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, min_lr=1e-5)

if USE_EMBEDDING:
    hist = model.fit({"num": X_train, "dev": df_train["_dev_id"].values}, y_train_scaled,
                     validation_split=0.15, epochs=300, batch_size=256, callbacks=[es, rlr], verbose=1)
else:
    hist = model.fit(X_train, y_train_scaled,
                     validation_split=0.15, epochs=300, batch_size=256, callbacks=[es, rlr], verbose=1)

# ---- predict ----
if USE_EMBEDDING:
    y_pred_scaled = model.predict({"num": X_test, "dev": df_test["_dev_id"].values}, verbose=0)
else:
    y_pred_scaled = model.predict(X_test, verbose=0)

# inverse scale + inverse log
y_pred_df = pd.DataFrame(y_pred_scaled, columns=Y_cols, index=y_test.index)
for i, c in enumerate(Y_cols):
    y_pred_df[c] = Yscalers[c].inverse_transform(y_pred_df[[c]].values).ravel()
if log_targets:
    y_pred_df = expm1_df(y_pred_df, log_targets)

# ---- metrics ----
def rmse(a,b): return float(np.sqrt(mean_squared_error(a, b)))
def r2(a,b):   return float(r2_score(a, b))

per_target = []
y_true_eval = y_test.copy()
if log_targets:
    # y_test_for_scale was log+scaled; we want to evaluate in the original space
    y_true_eval = y_test.copy()  # already original space

for c in Y_cols:
    per_target.append({
        "target": c,
        "RMSE": rmse(y_true_eval[c], y_pred_df[c]),
        "R2": r2(y_true_eval[c], y_pred_df[c]),
        "n_test": int(y_true_eval[c].size)
    })
metrics_df = pd.DataFrame(per_target).sort_values("R2", ascending=False)
print("\n=== Unseen device:", UNSEEN_DEVICE, "===")
print(metrics_df.to_string(index=False))

print("\nOverall RMSE:", rmse(y_true_eval.values, y_pred_df.values))
print("Overall R2  :", r2(y_true_eval.values, y_pred_df.values))

# ---- save predictions ----
OUT_PATH = os.path.join(os.path.dirname(CSV_PATH), f"ann_unseen_{UNSEEN_DEVICE}_{'withEmb' if USE_EMBEDDING else 'noEmb'}_log{int(APPLY_LOG1P)}_wloss.csv")
save_cols = [DEVICE_COL] + COMBO_COLS
save_cols = [c for c in save_cols if c in df_test.columns]
out = df_test[save_cols].copy()
for c in Y_cols:
    out[c+"_true"] = y_true_eval[c].values
    out[c+"_pred"] = y_pred_df[c].values
out.to_csv(OUT_PATH, index=False)
print(f"\n✅ Saved predictions: {OUT_PATH}")


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(


Epoch 1/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - loss: 8.0405 - mae: 0.4583 - val_loss: 1.7299 - val_mae: 0.2382 - learning_rate: 0.0010
Epoch 2/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 1.8268 - mae: 0.2232 - val_loss: 0.6465 - val_mae: 0.1267 - learning_rate: 0.0010
Epoch 3/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1.4268 - mae: 0.1918 - val_loss: 0.5974 - val_mae: 0.1207 - learning_rate: 0.0010
Epoch 4/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 1.2547 - mae: 0.1777 - val_loss: 0.5659 - val_mae: 0.1159 - learning_rate: 0.0010
Epoch 5/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 1.1728 - mae: 0.1690 - val_loss: 0.5484 - val_mae: 0.1138 - learning_rate: 0.0010
Epoch 6/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1.1125 - mae: 0.1

In [2]:
# ===== ANN for unseen MOSFET (common combos + 25% per device + rich derived + optional embedding + log targets + weighted loss) =====
import os, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, regularizers, callbacks, Model

# ------------ USER SETTINGS ------------
CSV_PATH = r"C:\Users\pc\Desktop\Classical_Models\Train_5_MOSFETs_engineered.csv"
DEVICE_COL = "Part_Number"
UNSEEN_DEVICE = "C2M0040120D"
SAMPLE_FRAC = 0.25
RANDOM_STATE = 42
USE_EMBEDDING = False     # True -> include device embedding with UNK for unseen
APPLY_LOG1P = True        # log1p transform for skewed time targets + ringing
# Per-target weights (emphasize the hard ones)
TARGET_WEIGHTS = {
    "voltage_rise_time_pulse1": 2.0,
    "voltage_rise_time_pulse2": 2.0,
    "voltage_fall_time_pulse1": 2.0,
    "voltage_fall_time_pulse2": 2.0,
    "current_rise_time_pulse1": 1.5,
    "current_rise_time_pulse2": 1.5,
    "current_fall_time_pulse1": 1.0,
    "current_fall_time_pulse2": 1.0,
    "overshoot_pulse_1": 1.0,
    "overshoot_pulse_2": 1.0,
    "undershoot_pulse_1": 1.0,
    "undershoot_pulse_2": 1.0,
    "ringing_frequency_MHz": 2.5
}
# --------------------------------------

np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

COMBO_COLS = ["Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11"]

RAW_INPUTS = [
    "Vbus","Rg","Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11",
    # include if present
    "Tp1","Tp2","Toff","Tstart","L1","L2","L3","R1",
    "VDS_max","ID_max_25C","RDS_on_typ","RDS_on_max",
    "VGS_th_min","VGS_th_typ","VGS_th_max",
    "Qg_total","Qrr_typ","Irrm_typ","Eon_typ","Eoff_typ",
    "Ciss","Coss","Crss",
    "Rth_JC_typ","Rth_JC_max","Tj_max",
    DEVICE_COL
]

TARGETS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# ---- load ----
df = pd.read_csv(CSV_PATH)
assert DEVICE_COL in df.columns, f"'{DEVICE_COL}' not found."

RAW_INPUTS = [c for c in RAW_INPUTS if c in df.columns]
TARGETS = [c for c in TARGETS if c in df.columns]
for c in COMBO_COLS:
    if c not in df.columns:
        raise ValueError(f"Missing combo column: {c}")

# ---- derived features ----
def add_derived_features(frame: pd.DataFrame) -> pd.DataFrame:
    f = frame.copy()
    eps = 1e-18
    c_stray = 5e-12
    # L_loop
    L_pieces = [c for c in ["Ls4","Ls5","Ls6","Ls7","Ls8","Ls9","Ls10","Ls11","L1","L2","L3"] if c in f.columns]
    f["L_loop"] = f[L_pieces].sum(axis=1) if L_pieces else np.nan
    # C_eq, resonance
    f["C_eq_est"] = (f["Coss"] if "Coss" in f.columns else 0).fillna(0) + c_stray
    f["f_res_est"] = 1.0/(2.0*np.pi*np.sqrt(np.clip(f["L_loop"]*f["C_eq_est"], eps, None)))
    # drive proxies
    f["gate_drive_strength"] = 1.0/((f["Rg"].abs() if "Rg" in f.columns else 0)+1e-3)
    f["dvdt_proxy"] = f["gate_drive_strength"]/((f["Crss"].abs() if "Crss" in f.columns else 0)+1e-15)
    f["miller_ratio"] = (f["Crss"].abs()/(f["Coss"].abs()+1e-15)) if ("Crss" in f.columns and "Coss" in f.columns) else np.nan
    # energy/cap ratios
    f["E_total"] = (f.get("Eon_typ", 0)).fillna(0) + (f.get("Eoff_typ", 0)).fillna(0)
    f["Cgd_ratio"] = (f["Crss"].abs()/(f["Ciss"].abs()+1e-15)) if ("Crss" in f.columns and "Ciss" in f.columns) else np.nan
    f["Qrr_over_Qg"] = (f["Qrr_typ"].abs()/(f["Qg_total"].abs()+1e-15)) if ("Qrr_typ" in f.columns and "Qg_total" in f.columns) else np.nan
    # damping proxy zeta ~ 0.5*R*sqrt(C/L)
    if "R1" in f.columns:
        f["zeta_proxy"] = 0.5*(f["R1"].abs())*np.sqrt(np.clip(f["C_eq_est"]/(f["L_loop"]+eps), eps, None))
    else:
        f["zeta_proxy"] = np.nan
    return f

def make_combo_key(frame: pd.DataFrame) -> pd.Series:
    rounded = frame[COMBO_COLS].apply(lambda s: np.round(s.astype(float), 6))
    return rounded.apply(lambda row: tuple(row.values.tolist()), axis=1)

# ---- filter to common combos & sample 25%/device ----
devices = df[DEVICE_COL].dropna().unique().tolist()
assert UNSEEN_DEVICE in devices, f"{UNSEEN_DEVICE} not in dataset {devices}"
train_devices = [d for d in devices if d != UNSEEN_DEVICE]

df["_combo_key"] = make_combo_key(df)
combos_unseen = set(df.loc[df[DEVICE_COL]==UNSEEN_DEVICE, "_combo_key"].dropna().unique())
combos_train_sets = [set(df.loc[df[DEVICE_COL]==d, "_combo_key"].dropna().unique()) for d in train_devices]
common_train = set.intersection(*combos_train_sets) if combos_train_sets else set()
common_combos = combos_unseen.intersection(common_train)
df_common = df[df["_combo_key"].isin(common_combos) & df[DEVICE_COL].isin(train_devices + [UNSEEN_DEVICE])].copy()

df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(
    lambda g: g.sample(frac=SAMPLE_FRAC, random_state=RANDOM_STATE)
).reset_index(drop=True)

# ---- features + derived ----
feature_cols = list(RAW_INPUTS)
df_feat = add_derived_features(df_sampled)
for c in ["L_loop","C_eq_est","f_res_est","gate_drive_strength","dvdt_proxy","miller_ratio",
          "E_total","Cgd_ratio","Qrr_over_Qg","zeta_proxy"]:
    if c in df_feat.columns: feature_cols.append(c)
feature_cols = list(dict.fromkeys(feature_cols))

# ---- split ----
df_train = df_feat[df_feat[DEVICE_COL] != UNSEEN_DEVICE].copy()
df_test  = df_feat[df_feat[DEVICE_COL] == UNSEEN_DEVICE].copy()

# optional embedding
if USE_EMBEDDING:
    le = LabelEncoder()
    known = df_train[DEVICE_COL].astype(str)
    le.fit(known.unique().tolist())
    df_train["_dev_id"] = le.transform(df_train[DEVICE_COL].astype(str))
    unk_id = len(le.classes_)
    df_test["_dev_id"] = df_test[DEVICE_COL].astype(str).map(lambda s: le.transform([s])[0] if s in le.classes_ else unk_id)
else:
    feature_cols = [c for c in feature_cols if c != DEVICE_COL]

# assemble matrices
X_num_train = df_train[feature_cols].copy()
X_num_test  = df_test[feature_cols].copy()
Y_cols = [c for c in TARGETS if c in df_train.columns]
y_train = df_train[Y_cols].copy()
y_test  = df_test[Y_cols].copy()

# ---- optional log1p on selected targets ----
log_targets = set()
if APPLY_LOG1P:
    for c in Y_cols:
        if ("rise_time" in c) or ("fall_time" in c) or ("ringing_frequency" in c):
            log_targets.add(c)
def log1p_df(df_, cols):
    out = df_.copy()
    for c in cols:
        out[c] = np.log1p(np.clip(out[c].values, a_min=0, a_max=None))
    return out
def expm1_df(df_, cols):
    out = df_.copy()
    for c in cols:
        out[c] = np.expm1(out[c].values)
    return out

y_train_for_scale = log1p_df(y_train, log_targets) if log_targets else y_train.copy()
y_test_for_scale  = log1p_df(y_test, log_targets)  if log_targets else y_test.copy()

# ---- scale inputs & outputs ----
Xscaler = StandardScaler()
Yscalers = {c: StandardScaler() for c in Y_cols}

X_train = Xscaler.fit_transform(X_num_train.fillna(X_num_train.median(numeric_only=True)))
X_test  = Xscaler.transform(X_num_test.fillna(X_num_train.median(numeric_only=True)))

y_train_scaled = np.zeros_like(y_train_for_scale.values, dtype=float)
y_test_scaled  = np.zeros_like(y_test_for_scale.values, dtype=float)
for i, c in enumerate(Y_cols):
    y_train_scaled[:, i] = Yscalers[c].fit_transform(y_train_for_scale[[c]].values).ravel()
    y_test_scaled[:, i]  = Yscalers[c].transform(y_test_for_scale[[c]].values).ravel()

# ---- weighted loss ----
# MSE with per-output weights: loss = mean(sum(w_i * (y_i - yhat_i)^2))
weights_vec = np.array([TARGET_WEIGHTS.get(c, 1.0) for c in Y_cols], dtype="float32")
weights_tf = tf.constant(weights_vec, dtype=tf.float32)
def weighted_mse(y_true, y_pred):
    err = y_true - y_pred
    return tf.reduce_mean(tf.reduce_sum(weights_tf * tf.square(err), axis=-1))

# ---- build model ----
def build_model(n_num_features, n_targets, n_devices=None, emb_dim=4):
    reg = regularizers.l2(1e-4)
    if USE_EMBEDDING:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        inp_dev = layers.Input(shape=(), dtype="int32", name="dev")
        emb = layers.Embedding(input_dim=n_devices, output_dim=emb_dim, name="dev_emb")(inp_dev)
        emb = layers.Flatten()(emb)
        x = layers.Concatenate()([inp_num, emb])
    else:
        inp_num = layers.Input(shape=(n_num_features,), name="num")
        x = inp_num

    x = layers.Dense(192, activation="relu", kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(96, activation="relu", kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.15)(x)
    x = layers.Dense(64, activation="relu", kernel_regularizer=reg)(x)
    out = layers.Dense(n_targets, activation="linear", name="y")(x)

    model = Model(inputs=[inp_num, inp_dev], outputs=out) if USE_EMBEDDING else Model(inputs=inp_num, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss=weighted_mse, metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae")])
    return model

n_devices = (df_train["_dev_id"].max() + 2) if USE_EMBEDDING else None
model = build_model(X_train.shape[1], len(Y_cols), n_devices=n_devices, emb_dim=4)

# ---- train ----
es = callbacks.EarlyStopping(monitor="val_loss", patience=25, restore_best_weights=True)
rlr = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, min_lr=1e-5)

if USE_EMBEDDING:
    hist = model.fit({"num": X_train, "dev": df_train["_dev_id"].values}, y_train_scaled,
                     validation_split=0.15, epochs=300, batch_size=256, callbacks=[es, rlr], verbose=1)
else:
    hist = model.fit(X_train, y_train_scaled,
                     validation_split=0.15, epochs=300, batch_size=256, callbacks=[es, rlr], verbose=1)

# ---- predict ----
if USE_EMBEDDING:
    y_pred_scaled = model.predict({"num": X_test, "dev": df_test["_dev_id"].values}, verbose=0)
else:
    y_pred_scaled = model.predict(X_test, verbose=0)

# inverse scale + inverse log
y_pred_df = pd.DataFrame(y_pred_scaled, columns=Y_cols, index=y_test.index)
for i, c in enumerate(Y_cols):
    y_pred_df[c] = Yscalers[c].inverse_transform(y_pred_df[[c]].values).ravel()
if log_targets:
    y_pred_df = expm1_df(y_pred_df, log_targets)

# ---- metrics ----
def rmse(a,b): return float(np.sqrt(mean_squared_error(a, b)))
def r2(a,b):   return float(r2_score(a, b))

per_target = []
y_true_eval = y_test.copy()
if log_targets:
    # y_test_for_scale was log+scaled; we want to evaluate in the original space
    y_true_eval = y_test.copy()  # already original space

for c in Y_cols:
    per_target.append({
        "target": c,
        "RMSE": rmse(y_true_eval[c], y_pred_df[c]),
        "R2": r2(y_true_eval[c], y_pred_df[c]),
        "n_test": int(y_true_eval[c].size)
    })
metrics_df = pd.DataFrame(per_target).sort_values("R2", ascending=False)
print("\n=== Unseen device:", UNSEEN_DEVICE, "===")
print(metrics_df.to_string(index=False))

print("\nOverall RMSE:", rmse(y_true_eval.values, y_pred_df.values))
print("Overall R2  :", r2(y_true_eval.values, y_pred_df.values))

# ---- save predictions ----
OUT_PATH = os.path.join(os.path.dirname(CSV_PATH), f"ann_unseen_{UNSEEN_DEVICE}_{'withEmb' if USE_EMBEDDING else 'noEmb'}_log{int(APPLY_LOG1P)}_wloss.csv")
save_cols = [DEVICE_COL] + COMBO_COLS
save_cols = [c for c in save_cols if c in df_test.columns]
out = df_test[save_cols].copy()
for c in Y_cols:
    out[c+"_true"] = y_true_eval[c].values
    out[c+"_pred"] = y_pred_df[c].values
out.to_csv(OUT_PATH, index=False)
print(f"\n✅ Saved predictions: {OUT_PATH}")


  df_sampled = df_common.groupby(DEVICE_COL, group_keys=False).apply(


Epoch 1/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 7.8233 - mae: 0.4598 - val_loss: 2.0370 - val_mae: 0.2715 - learning_rate: 0.0010
Epoch 2/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.8095 - mae: 0.2237 - val_loss: 0.7437 - val_mae: 0.1405 - learning_rate: 0.0010
Epoch 3/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.4149 - mae: 0.1929 - val_loss: 0.5781 - val_mae: 0.1147 - learning_rate: 0.0010
Epoch 4/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.2480 - mae: 0.1781 - val_loss: 0.5683 - val_mae: 0.1123 - learning_rate: 0.0010
Epoch 5/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.1541 - mae: 0.1684 - val_loss: 0.5322 - val_mae: 0.1064 - learning_rate: 0.0010
Epoch 6/300
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.0862 - mae: 0.1619