## MULTI-HEAD ANN

| Iteration    | Dropout | L2 Reg. | BatchNorm | EarlyStopping | LR Scheduler         | Scaling                     | Physics Features | Notes                          |
| ------------ | ------- | ------- | --------- | ------------- | -------------------- | --------------------------- | ---------------- | ------------------------------ |
| **Fast (1)** | —       | 1e-4    | Yes       | Yes (p=7)     | ReduceLRO (0.5, p=3) | StdScaler (X), per-target y | Yes              | Light backbone, quick training |
| **2**        | 0.2     | 1e-4    | Yes       | Yes (p=10)    | ReduceLRO (0.3, p=5) | StdScaler (X), per-target y | Yes              | Deeper heads, stronger gen.    |
| **3**        | 0.3     | 1e-4    | Yes       | Yes (p=15)    | ReduceLRO (0.5, p=7) | StdScaler (X), per-target y | Yes              | Even more dropout, robust      |


In [7]:
# ==================== MULTI-HEAD ANN (FAST MODE) ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0040120D'
BASE_DIR = "multihead_ann_fast"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(
    r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_10percent_balanced.csv"
)

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
seen_parts = [p for p in df['Part_Number'].unique().tolist() if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df  = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== PHYSICS FEATURES ==============
def compute_physics_features(row):
    L_eq = row[['Ls4','Ls5','Ls6','Ls7','Ls8','Ls9','Ls10','Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    f_res = 1/(2*np.pi*np.sqrt(L_eq*C_eq))/1e6 if L_eq>0 and C_eq>0 else 0
    overshoot_est = row.get("VDS_max",0) - row.get("Vbus",0)
    undershoot_est = - row.get("VGS_th_min",0)
    dVdt_est = row.get("VDS_max",0)/row.get("Tp1",1e-9)
    dIdt_est = row.get("ID_max_25C",0)/row.get("Tp1",1e-9)
    return pd.Series([f_res, overshoot_est, undershoot_est, dVdt_est, dIdt_est])

for df_ in [train_df, test_df]:
    df_[['f_resonance','overshoot_est','undershoot_est','dVdt_est','dIdt_est']] = \
        df_.apply(compute_physics_features, axis=1)

physics_features = ['f_resonance','overshoot_est','undershoot_est','dVdt_est','dIdt_est']
INPUT_COLUMNS = [c for c in df.columns if c not in TARGET_COLUMNS + DROP_COLUMNS] + physics_features

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all  = input_scaler.transform(test_df[INPUT_COLUMNS])

# Separate physics cols
phys_idx = [INPUT_COLUMNS.index(c) for c in physics_features]
X_train_phys = X_train_all[:, phys_idx]
X_test_phys  = X_test_all[:, phys_idx]
X_train_main = np.delete(X_train_all, phys_idx, axis=1)
X_test_main  = np.delete(X_test_all, phys_idx, axis=1)

# ============== SCALE OUTPUTS (per target) ==============
output_scalers, y_train_scaled, y_test_scaled = {}, [], []
for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled.append(scaler.fit_transform(train_df[[col]]).flatten())
    y_test_scaled.append(scaler.transform(test_df[[col]]).flatten())
    output_scalers[col] = scaler
y_train_scaled = np.array(y_train_scaled).T
y_test_scaled  = np.array(y_test_scaled).T

# ============== SPLIT TRAIN/VAL ==============
Xtr_main, Xval_main, Xtr_phys, Xval_phys, ytr, yval = train_test_split(
    X_train_main, X_train_phys, y_train_scaled,
    test_size=0.15, random_state=SEED
)

# ============== BUILD MULTI-HEAD ANN (FAST BACKBONE) ==============
def build_multihead_ann(input_dim_main, input_dim_phys, output_dim, l2_reg=1e-4):
    inp_main = layers.Input(shape=(input_dim_main,), name="main_inputs")
    inp_phys = layers.Input(shape=(input_dim_phys,), name="physics_inputs")

    # Smaller backbone
    x_main = layers.Dense(128, kernel_regularizer=regularizers.l2(l2_reg))(inp_main)
    x_main = layers.BatchNormalization()(x_main); x_main = layers.ReLU()(x_main)
    x_main = layers.Dense(64, kernel_regularizer=regularizers.l2(l2_reg))(x_main)
    x_main = layers.BatchNormalization()(x_main); x_main = layers.ReLU()(x_main)

    x_phys = layers.Dense(32, activation="relu")(inp_phys)

    shared = layers.concatenate([x_main, x_phys])

    outputs = []
    for i, col in enumerate(TARGET_COLUMNS):
        h = layers.Dense(32, activation="relu")(shared)  # per-head reduced
        out = layers.Dense(1, activation="linear", name=col)(h)
        outputs.append(out)

    model = models.Model(inputs=[inp_main, inp_phys], outputs=outputs)
    return model

model = build_multihead_ann(Xtr_main.shape[1], Xtr_phys.shape[1], len(TARGET_COLUMNS))

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=["mse"]*len(TARGET_COLUMNS),
    metrics=["mae"]*len(TARGET_COLUMNS)
)

early_stop = callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True)
reduce_lr  = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3)

# ============== TRAIN MODEL (FAST MODE) ==============
history = model.fit(
    [Xtr_main, Xtr_phys],
    [ytr[:,i] for i in range(len(TARGET_COLUMNS))],
    validation_data=([Xval_main,Xval_phys],[yval[:,i] for i in range(len(TARGET_COLUMNS))]),
    epochs=100, batch_size=256,
    callbacks=[early_stop, reduce_lr], verbose=1
)

model.save(f"{BASE_DIR}/models/multihead_ann.h5")

# ============== LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch"); plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend(); plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png"); plt.close()

# ============== EVALUATION ==============
def evaluate_and_save(X_main, X_phys, y_scaled, name, positive_only=False):
    y_pred_scaled_list = model.predict([X_main, X_phys])
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:,i].reshape(-1,1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled_list[i].reshape(-1,1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))
    df_results = pd.DataFrame(results, columns=["Target","R2","RMSE"]).sort_values("R2",ascending=False)
    if positive_only:
        df_results = df_results[df_results["R2"]>0]
    print(f"\n{name}:\n",df_results)
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# Run evals
evaluate_and_save(Xtr_main,Xtr_phys,ytr,"train")
evaluate_and_save(Xval_main,Xval_phys,yval,"val")
evaluate_and_save(X_train_main,X_train_phys,y_train_scaled,"test")
evaluate_and_save(X_test_main,X_test_phys,y_test_scaled,"unseen",positive_only=True)


Epoch 1/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 55ms/step - current_fall_time_pulse1_loss: 0.9631 - current_fall_time_pulse1_mae: 0.7438 - current_fall_time_pulse2_loss: 0.6239 - current_fall_time_pulse2_mae: 0.6005 - current_rise_time_pulse1_loss: 1.1404 - current_rise_time_pulse1_mae: 0.7334 - current_rise_time_pulse2_loss: 0.6831 - current_rise_time_pulse2_mae: 0.6702 - loss: 10.2992 - overshoot_pulse_1_loss: 0.9733 - overshoot_pulse_1_mae: 0.7399 - overshoot_pulse_2_loss: 0.9423 - overshoot_pulse_2_mae: 0.6924 - ringing_frequency_MHz_loss: 0.3735 - ringing_frequency_MHz_mae: 0.4427 - undershoot_pulse_1_loss: 0.9582 - undershoot_pulse_1_mae: 0.7879 - undershoot_pulse_2_loss: 0.8307 - undershoot_pulse_2_mae: 0.6643 - voltage_fall_time_pulse1_loss: 0.5286 - voltage_fall_time_pulse1_mae: 0.5607 - voltage_fall_time_pulse2_loss: 0.6468 - voltage_fall_time_pulse2_mae: 0.5839 - voltage_rise_time_pulse1_loss: 0.7268 - voltage_rise_time_pulse1_mae: 0.5984 - voltag



[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

train:
                       Target        R2          RMSE
3   voltage_fall_time_pulse2  0.998909  2.260635e-10
2   voltage_fall_time_pulse1  0.998872  2.301607e-10
6   current_fall_time_pulse1  0.997892  5.391787e-10
7   current_fall_time_pulse2  0.997673  5.643914e-10
12     ringing_frequency_MHz  0.993974  2.550473e+00
0   voltage_rise_time_pulse1  0.991178  3.784552e-10
11        undershoot_pulse_2  0.988818  1.467432e+00
10        undershoot_pulse_1  0.988205  1.504162e+00
8          overshoot_pulse_1  0.975838  1.789765e+00
4   current_rise_time_pulse1  0.944083  1.154686e-08
9          overshoot_pulse_2  0.942570  5.930406e+00
1   voltage_rise_time_pulse2  0.939287  9.467240e-10
5   current_rise_time_pulse2  0.928856  6.578344e-09
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

val:
                       Target        R2          RMSE
2   voltage_fall_time_pulse1  0.998578  2

Unnamed: 0,Target,R2,RMSE
7,current_fall_time_pulse2,0.976606,1.70582e-09
6,current_fall_time_pulse1,0.916178,3.251214e-09
2,voltage_fall_time_pulse1,0.766286,1.125194e-09
9,overshoot_pulse_2,0.708183,10.73223
3,voltage_fall_time_pulse2,0.698288,1.279096e-09
8,overshoot_pulse_1,0.668247,7.316996
11,undershoot_pulse_2,0.549278,6.39389
0,voltage_rise_time_pulse1,0.427653,1.318961e-09
10,undershoot_pulse_1,0.308612,7.976464
5,current_rise_time_pulse2,0.022475,1.641188e-08


In [8]:
# ==================== MULTI-HEAD ANN (FAST MODE) ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0040120D'
BASE_DIR = "iteration_2"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(
    r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_10percent_balanced.csv"
)

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
seen_parts = [p for p in df['Part_Number'].unique().tolist() if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df  = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== PHYSICS FEATURES ==============
def compute_physics_features(row):
    L_eq = row[['Ls4','Ls5','Ls6','Ls7','Ls8','Ls9','Ls10','Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    f_res = 1/(2*np.pi*np.sqrt(L_eq*C_eq))/1e6 if L_eq>0 and C_eq>0 else 0
    overshoot_est = row.get("VDS_max",0) - row.get("Vbus",0)
    undershoot_est = - row.get("VGS_th_min",0)
    dVdt_est = row.get("VDS_max",0)/row.get("Tp1",1e-9)
    dIdt_est = row.get("ID_max_25C",0)/row.get("Tp1",1e-9)
    return pd.Series([f_res, overshoot_est, undershoot_est, dVdt_est, dIdt_est])

for df_ in [train_df, test_df]:
    df_[['f_resonance','overshoot_est','undershoot_est','dVdt_est','dIdt_est']] = \
        df_.apply(compute_physics_features, axis=1)

physics_features = ['f_resonance','overshoot_est','undershoot_est','dVdt_est','dIdt_est']
INPUT_COLUMNS = [c for c in df.columns if c not in TARGET_COLUMNS + DROP_COLUMNS] + physics_features

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all  = input_scaler.transform(test_df[INPUT_COLUMNS])

# Separate physics cols
phys_idx = [INPUT_COLUMNS.index(c) for c in physics_features]
X_train_phys = X_train_all[:, phys_idx]
X_test_phys  = X_test_all[:, phys_idx]
X_train_main = np.delete(X_train_all, phys_idx, axis=1)
X_test_main  = np.delete(X_test_all, phys_idx, axis=1)

# ============== SCALE OUTPUTS (per target) ==============
output_scalers, y_train_scaled, y_test_scaled = {}, [], []
for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled.append(scaler.fit_transform(train_df[[col]]).flatten())
    y_test_scaled.append(scaler.transform(test_df[[col]]).flatten())
    output_scalers[col] = scaler
y_train_scaled = np.array(y_train_scaled).T
y_test_scaled  = np.array(y_test_scaled).T

# ============== SPLIT TRAIN/VAL ==============
Xtr_main, Xval_main, Xtr_phys, Xval_phys, ytr, yval = train_test_split(
    X_train_main, X_train_phys, y_train_scaled,
    test_size=0.15, random_state=SEED
)

# ============== BUILD MULTI-HEAD ANN (FAST BACKBONE) ==============
def build_multihead_ann(input_dim_main, input_dim_phys, output_dim, l2_reg=1e-4, dropout_rate=0.2):
    inp_main = layers.Input(shape=(input_dim_main,), name="main_inputs")
    inp_phys = layers.Input(shape=(input_dim_phys,), name="physics_inputs")

    # Shared backbone
    x_main = layers.Dense(128, kernel_regularizer=regularizers.l2(l2_reg))(inp_main)
    x_main = layers.BatchNormalization()(x_main); x_main = layers.ReLU()(x_main)
    x_main = layers.Dropout(dropout_rate)(x_main)

    x_main = layers.Dense(64, kernel_regularizer=regularizers.l2(l2_reg))(x_main)
    x_main = layers.BatchNormalization()(x_main); x_main = layers.ReLU()(x_main)
    x_main = layers.Dropout(dropout_rate)(x_main)

    x_phys = layers.Dense(32, activation="relu")(inp_phys)

    shared = layers.concatenate([x_main, x_phys])

    # Multi-head outputs (deeper per-head)
    outputs = []
    for i, col in enumerate(TARGET_COLUMNS):
        h = layers.Dense(64, activation="relu")(shared)
        h = layers.Dropout(dropout_rate)(h)
        h = layers.Dense(32, activation="relu")(h)
        out = layers.Dense(1, activation="linear", name=col)(h)
        outputs.append(out)

    model = models.Model(inputs=[inp_main, inp_phys], outputs=outputs)
    return model

model = build_multihead_ann(Xtr_main.shape[1], Xtr_phys.shape[1], len(TARGET_COLUMNS))

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
    loss=["mse"]*len(TARGET_COLUMNS),
    metrics=["mae"]*len(TARGET_COLUMNS)
)

early_stop = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr  = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience=5)

history = model.fit(
    [Xtr_main, Xtr_phys],
    [ytr[:,i] for i in range(len(TARGET_COLUMNS))],
    validation_data=([Xval_main,Xval_phys],[yval[:,i] for i in range(len(TARGET_COLUMNS))]),
    epochs=120, batch_size=256, callbacks=[early_stop, reduce_lr], verbose=1
)


model.save(f"{BASE_DIR}/models/multihead_ann.h5")

# ============== LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch"); plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend(); plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png"); plt.close()

# ============== EVALUATION ==============
def evaluate_and_save(X_main, X_phys, y_scaled, name, positive_only=False):
    y_pred_scaled_list = model.predict([X_main, X_phys])
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:,i].reshape(-1,1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled_list[i].reshape(-1,1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))
    df_results = pd.DataFrame(results, columns=["Target","R2","RMSE"]).sort_values("R2",ascending=False)
    if positive_only:
        df_results = df_results[df_results["R2"]>0]
    print(f"\n{name}:\n",df_results)
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# Run evals
evaluate_and_save(Xtr_main,Xtr_phys,ytr,"train")
evaluate_and_save(Xval_main,Xval_phys,yval,"val")
evaluate_and_save(X_train_main,X_train_phys,y_train_scaled,"test")
evaluate_and_save(X_test_main,X_test_phys,y_test_scaled,"unseen",positive_only=True)


Epoch 1/120
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 44ms/step - current_fall_time_pulse1_loss: 1.8545 - current_fall_time_pulse1_mae: 1.0631 - current_fall_time_pulse2_loss: 0.9453 - current_fall_time_pulse2_mae: 0.7743 - current_rise_time_pulse1_loss: 0.7775 - current_rise_time_pulse1_mae: 0.6624 - current_rise_time_pulse2_loss: 0.9183 - current_rise_time_pulse2_mae: 0.7812 - loss: 11.8151 - overshoot_pulse_1_loss: 0.9212 - overshoot_pulse_1_mae: 0.7356 - overshoot_pulse_2_loss: 1.0398 - overshoot_pulse_2_mae: 0.8038 - ringing_frequency_MHz_loss: 0.1362 - ringing_frequency_MHz_mae: 0.2772 - undershoot_pulse_1_loss: 0.9903 - undershoot_pulse_1_mae: 0.7538 - undershoot_pulse_2_loss: 1.1218 - undershoot_pulse_2_mae: 0.8164 - voltage_fall_time_pulse1_loss: 0.7839 - voltage_fall_time_pulse1_mae: 0.6971 - voltage_fall_time_pulse2_loss: 0.7918 - voltage_fall_time_pulse2_mae: 0.7050 - voltage_rise_time_pulse1_loss: 0.8400 - voltage_rise_time_pulse1_mae: 0.6760 - volta



[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

train:
                       Target        R2          RMSE
3   voltage_fall_time_pulse2  0.983004  8.922442e-10
6   current_fall_time_pulse1  0.979457  1.683065e-09
2   voltage_fall_time_pulse1  0.979013  9.926192e-10
7   current_fall_time_pulse2  0.977487  1.755638e-09
0   voltage_rise_time_pulse1  0.965301  7.505865e-10
10        undershoot_pulse_1  0.962893  2.667893e+00
11        undershoot_pulse_2  0.961716  2.715258e+00
12     ringing_frequency_MHz  0.956772  6.831207e+00
8          overshoot_pulse_1  0.928850  3.071269e+00
4   current_rise_time_pulse1  0.925359  1.334075e-08
9          overshoot_pulse_2  0.920271  6.987537e+00
1   voltage_rise_time_pulse2  0.885704  1.298965e-09
5   current_rise_time_pulse2  0.876248  8.676113e-09
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

val:
                       Target        R2          RMSE
3   voltage_fall_time_pulse2  0.983327  9

Unnamed: 0,Target,R2,RMSE
2,voltage_fall_time_pulse1,0.917634,6.679715e-10
3,voltage_fall_time_pulse2,0.909963,6.987443e-10
6,current_fall_time_pulse1,0.804093,4.970398e-09
7,current_fall_time_pulse2,0.766219,5.392481e-09
8,overshoot_pulse_1,0.68533,7.12612
0,voltage_rise_time_pulse1,0.64081,1.044874e-09
9,overshoot_pulse_2,0.600541,12.55656
11,undershoot_pulse_2,0.564682,6.283679
10,undershoot_pulse_1,0.511808,6.70262
1,voltage_rise_time_pulse2,0.470281,3.206266e-09


In [None]:
# ==================== MULTI-HEAD ANN (FAST MODE) ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0040120D'
BASE_DIR = "iteration_3"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(
    r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent_balanced.csv"
)

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
seen_parts = [p for p in df['Part_Number'].unique().tolist() if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df  = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== PHYSICS FEATURES ==============
def compute_physics_features(row):
    L_eq = row[['Ls4','Ls5','Ls6','Ls7','Ls8','Ls9','Ls10','Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    f_res = 1/(2*np.pi*np.sqrt(L_eq*C_eq))/1e6 if L_eq>0 and C_eq>0 else 0
    overshoot_est = row.get("VDS_max",0) - row.get("Vbus",0)
    undershoot_est = - row.get("VGS_th_min",0)
    dVdt_est = row.get("VDS_max",0)/row.get("Tp1",1e-9)
    dIdt_est = row.get("ID_max_25C",0)/row.get("Tp1",1e-9)
    return pd.Series([f_res, overshoot_est, undershoot_est, dVdt_est, dIdt_est])

for df_ in [train_df, test_df]:
    df_[['f_resonance','overshoot_est','undershoot_est','dVdt_est','dIdt_est']] = \
        df_.apply(compute_physics_features, axis=1)

physics_features = ['f_resonance','overshoot_est','undershoot_est','dVdt_est','dIdt_est']
INPUT_COLUMNS = [c for c in df.columns if c not in TARGET_COLUMNS + DROP_COLUMNS] + physics_features

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all  = input_scaler.transform(test_df[INPUT_COLUMNS])

# Separate physics cols
phys_idx = [INPUT_COLUMNS.index(c) for c in physics_features]
X_train_phys = X_train_all[:, phys_idx]
X_test_phys  = X_test_all[:, phys_idx]
X_train_main = np.delete(X_train_all, phys_idx, axis=1)
X_test_main  = np.delete(X_test_all, phys_idx, axis=1)

# ============== SCALE OUTPUTS (per target) ==============
output_scalers, y_train_scaled, y_test_scaled = {}, [], []
for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled.append(scaler.fit_transform(train_df[[col]]).flatten())
    y_test_scaled.append(scaler.transform(test_df[[col]]).flatten())
    output_scalers[col] = scaler
y_train_scaled = np.array(y_train_scaled).T
y_test_scaled  = np.array(y_test_scaled).T

# ============== SPLIT TRAIN/VAL ==============
Xtr_main, Xval_main, Xtr_phys, Xval_phys, ytr, yval = train_test_split(
    X_train_main, X_train_phys, y_train_scaled,
    test_size=0.15, random_state=SEED
)

# ============== BUILD MULTI-HEAD ANN (FAST BACKBONE) ==============
def build_multihead_ann(input_dim_main, input_dim_phys, output_dim, l2_reg=1e-4, dropout_rate=0.2):
    inp_main = layers.Input(shape=(input_dim_main,), name="main_inputs")
    inp_phys = layers.Input(shape=(input_dim_phys,), name="physics_inputs")

    # Shared backbone
    x_main = layers.Dense(128, kernel_regularizer=regularizers.l2(l2_reg))(inp_main)
    x_main = layers.BatchNormalization()(x_main); x_main = layers.ReLU()(x_main)
    x_main = layers.Dropout(dropout_rate)(x_main)

    x_main = layers.Dense(64, kernel_regularizer=regularizers.l2(l2_reg))(x_main)
    x_main = layers.BatchNormalization()(x_main); x_main = layers.ReLU()(x_main)
    x_main = layers.Dropout(dropout_rate)(x_main)

    x_phys = layers.Dense(32, activation="relu")(inp_phys)

    shared = layers.concatenate([x_main, x_phys])

    # Multi-head outputs (deeper per-head)
    outputs = []
    for i, col in enumerate(TARGET_COLUMNS):
        h = layers.Dense(64, activation="relu")(shared)
        h = layers.Dropout(dropout_rate)(h)
        h = layers.Dense(32, activation="relu")(h)
        out = layers.Dense(1, activation="linear", name=col)(h)
        outputs.append(out)

    model = models.Model(inputs=[inp_main, inp_phys], outputs=outputs)
    return model

model = build_multihead_ann(Xtr_main.shape[1], Xtr_phys.shape[1], len(TARGET_COLUMNS))

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
    loss=["mse"]*len(TARGET_COLUMNS),
    metrics=["mae"]*len(TARGET_COLUMNS)
)

early_stop = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr  = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience=5)

history = model.fit(
    [Xtr_main, Xtr_phys],
    [ytr[:,i] for i in range(len(TARGET_COLUMNS))],
    validation_data=([Xval_main,Xval_phys],[yval[:,i] for i in range(len(TARGET_COLUMNS))]),
    epochs=120, batch_size=256, callbacks=[early_stop, reduce_lr], verbose=1
)


model.save(f"{BASE_DIR}/models/multihead_ann.h5")

# ============== LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch"); plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend(); plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png"); plt.close()

# ============== EVALUATION ==============
def evaluate_and_save(X_main, X_phys, y_scaled, name, positive_only=False):
    y_pred_scaled_list = model.predict([X_main, X_phys])
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:,i].reshape(-1,1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled_list[i].reshape(-1,1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))
    df_results = pd.DataFrame(results, columns=["Target","R2","RMSE"]).sort_values("R2",ascending=False)
    if positive_only:
        df_results = df_results[df_results["R2"]>0]
    print(f"\n{name}:\n",df_results)
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# Run evals
evaluate_and_save(Xtr_main,Xtr_phys,ytr,"train")
evaluate_and_save(Xval_main,Xval_phys,yval,"val")
evaluate_and_save(X_train_main,X_train_phys,y_train_scaled,"test")
evaluate_and_save(X_test_main,X_test_phys,y_test_scaled,"unseen",positive_only=True)


Epoch 1/120
[1m287/287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - current_fall_time_pulse1_loss: 0.3721 - current_fall_time_pulse1_mae: 0.4432 - current_fall_time_pulse2_loss: 0.3732 - current_fall_time_pulse2_mae: 0.4416 - current_rise_time_pulse1_loss: 0.3775 - current_rise_time_pulse1_mae: 0.4446 - current_rise_time_pulse2_loss: 0.4416 - current_rise_time_pulse2_mae: 0.5135 - loss: 5.3318 - overshoot_pulse_1_loss: 0.5704 - overshoot_pulse_1_mae: 0.5558 - overshoot_pulse_2_loss: 0.5662 - overshoot_pulse_2_mae: 0.5493 - ringing_frequency_MHz_loss: 0.1719 - ringing_frequency_MHz_mae: 0.2796 - undershoot_pulse_1_loss: 0.4616 - undershoot_pulse_1_mae: 0.4957 - undershoot_pulse_2_loss: 0.5062 - undershoot_pulse_2_mae: 0.5051 - voltage_fall_time_pulse1_loss: 0.2384 - voltage_fall_time_pulse1_mae: 0.3428 - voltage_fall_time_pulse2_loss: 0.3590 - voltage_fall_time_pulse2_mae: 0.4276 - voltage_rise_time_pulse1_loss: 0.3986 - voltage_rise_time_pulse1_mae: 0.4199 - volt



[1m2294/2294[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step

train:
                       Target        R2          RMSE
7   current_fall_time_pulse2  0.972783  1.940674e-09
3   voltage_fall_time_pulse2  0.972357  1.141393e-09
2   voltage_fall_time_pulse1  0.971768  1.154872e-09
6   current_fall_time_pulse1  0.964456  2.224513e-09
0   voltage_rise_time_pulse1  0.963585  7.731276e-10
12     ringing_frequency_MHz  0.952935  7.164022e+00
11        undershoot_pulse_2  0.952548  2.986254e+00
10        undershoot_pulse_1  0.949866  3.067763e+00
8          overshoot_pulse_1  0.944745  2.874877e+00
4   current_rise_time_pulse1  0.911510  1.461749e-08
9          overshoot_pulse_2  0.910612  7.561471e+00
1   voltage_rise_time_pulse2  0.883570  1.374071e-09
5   current_rise_time_pulse2  0.883250  8.581928e-09
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step

val:
                       Target        R2          RMSE
7   current_fall_time_pulse2  0.97297

Unnamed: 0,Target,R2,RMSE
2,voltage_fall_time_pulse1,0.917023,6.706297e-10
7,current_fall_time_pulse2,0.914252,3.264926e-09
3,voltage_fall_time_pulse2,0.907579,7.081463e-10
6,current_fall_time_pulse1,0.883044,3.839445e-09
8,overshoot_pulse_1,0.708823,7.297281
11,undershoot_pulse_2,0.490496,6.954391
1,voltage_rise_time_pulse2,0.474846,3.380852e-09
10,undershoot_pulse_1,0.425873,7.432636
9,overshoot_pulse_2,0.394913,16.11558
5,current_rise_time_pulse2,0.122039,1.570873e-08
