# Baseline ANN Model 

Here we are Sampling from the complete dataset to 25% from each unique simulation combination of:


['Vbus', 'Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']

In [21]:
import pandas as pd
SEED = 42

df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs.csv")

# Select simulation columns that define unique simulation setups
sim_cols = ['Vbus', 'Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']

# Sample 25% from each unique simulation config
df_25 = (
    df.groupby(sim_cols, group_keys=False)
      .apply(lambda x: x.sample(frac=0.25, random_state=SEED))
      .reset_index(drop=True)
)

# Save to disk for reuse
df_25.to_csv("merged_train_5_MOSFETs_25percent.csv", index=False)
print("Saved 25% sampled file to: merged_train_5_MOSFETs_25percent.csv")


  .apply(lambda x: x.sample(frac=0.25, random_state=SEED))


Saved 25% sampled file to: merged_train_5_MOSFETs_25percent.csv


## ITERATION - 1

| **Technique**                            | **In this Iteration**                                                               |
| ---------------------------------------- | -------------------------------------------------------------------------------------------- |
| **Dropout**                            | Two `Dropout(0.2)` layers after Dense                                                        |
| **L2 Regularization**                  | `kernel_regularizer=regularizers.l2(1e-4)` in Dense layers                                   |
| **Early Stopping**                     | `callbacks.EarlyStopping(monitor='val_loss', patience=10)`                                   |
| **Batch Normalization**                | —                                                                                            |
| **Learning Rate Scheduling**           | Fixed learning rate (`Adam`) without decay/scheduler                                         |
| **Scaling (Inputs)**                   | Inputs scaled using `StandardScaler()`                                                       |
| **Scaling (Outputs)**                  | All targets scaled using `StandardScaler`, but `ringing_frequency_MHz` uses `MinMaxScaler()` |
| **Hyperparameter Tuning**              | Manual hyperparameters set in `build_ann()`                                                  |
| **Evaluation (Train/Val/Test/Unseen)** | Evaluates on 4 splits: `train`, `val`, `test`, `unseen MOSFET`                               |
| **Generalization to Unseen Device**    | Uses `Part_Number` encoding, and unseen part (`C2M0280120D`) is evaluated separately         |


In [23]:
# ==================== ITERATION 1: BASELINE ANN WITH PART_NUMBER GENERALIZATION ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0280120D'

BASE_DIR = "first_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")  # Add encoded device as feature

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE BASELINE ANN ==============
def build_ann(input_dim, output_dim, dropout=0.2, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop], verbose=1)

model.save(f"{BASE_DIR}/models/baseline_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results


# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL (INTERNAL TEST) SCATTER PLOTS ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='lightblue')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()



Epoch 1/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.2397 - mae: 0.3097 - val_loss: 0.0843 - val_mae: 0.1451
Epoch 2/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1087 - mae: 0.1966 - val_loss: 0.0721 - val_mae: 0.1366
Epoch 3/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0944 - mae: 0.1830 - val_loss: 0.0668 - val_mae: 0.1301
Epoch 4/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0899 - mae: 0.1773 - val_loss: 0.0663 - val_mae: 0.1288
Epoch 5/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0872 - mae: 0.1741 - val_loss: 0.0640 - val_mae: 0.1259
Epoch 6/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0861 - mae: 0.1727 - val_loss: 0.0633 - val_mae: 0.1267
Epoch 7/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



[1m2238/2238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.998224 2.482687e-10
voltage_fall_time_pulse1 0.998206 2.498879e-10
current_fall_time_pulse2 0.993574 8.118418e-10
current_fall_time_pulse1 0.993474 8.199246e-10
voltage_rise_time_pulse1 0.981386 5.419773e-10
   ringing_frequency_MHz 0.977980 2.963775e-06
       overshoot_pulse_1 0.973717 2.503170e+00
      undershoot_pulse_2 0.964102 2.172334e+00
      undershoot_pulse_1 0.962148 2.260896e+00
current_rise_time_pulse2 0.949044 7.116870e-09
       overshoot_pulse_2 0.915609 6.622726e+00
voltage_rise_time_pulse2 0.885372 1.645811e-09
current_rise_time_pulse1 0.712010 1.443695e-08
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 894us/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse1 0.998275 2.447454e-10
voltage_fall_time_pulse2 0.998255 2.457510e-10
curre

## ITERATION - 2

In [25]:
# ==================== ITERATION 2: NO REGULARIZATION ANN ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0280120D'

BASE_DIR = "second_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")  # Add encoded device as feature

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE NO REGULARIZATION ANN ==============
def build_ann(input_dim, output_dim):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation='relu'),
        layers.Dense(128, activation='relu'),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop], verbose=1)

model.save(f"{BASE_DIR}/models/no_regularization_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL (INTERNAL TEST) SCATTER PLOTS ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='orange')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.1528 - mae: 0.2224 - val_loss: 0.0620 - val_mae: 0.1333
Epoch 2/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0593 - mae: 0.1273 - val_loss: 0.0555 - val_mae: 0.1231
Epoch 3/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0533 - mae: 0.1175 - val_loss: 0.0530 - val_mae: 0.1171
Epoch 4/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0509 - mae: 0.1130 - val_loss: 0.0508 - val_mae: 0.1119
Epoch 5/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0495 - mae: 0.1101 - val_loss: 0.0499 - val_mae: 0.1095
Epoch 6/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0486 - mae: 0.1081 - val_loss: 0.0491 - val_mae: 0.1079
Epoch 7/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



[1m2238/2238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 899us/step

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.999214 1.651387e-10
voltage_fall_time_pulse1 0.999211 1.656969e-10
current_fall_time_pulse2 0.998619 3.763129e-10
current_fall_time_pulse1 0.998474 3.964882e-10
   ringing_frequency_MHz 0.998188 8.502798e-07
voltage_rise_time_pulse1 0.991424 3.678801e-10
       overshoot_pulse_1 0.987109 1.753050e+00
      undershoot_pulse_2 0.976192 1.769095e+00
      undershoot_pulse_1 0.973869 1.878530e+00
current_rise_time_pulse2 0.967089 5.719536e-09
       overshoot_pulse_2 0.925020 6.242556e+00
voltage_rise_time_pulse2 0.907220 1.480680e-09
current_rise_time_pulse1 0.746301 1.355021e-08
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 907us/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.999245 1.616906e-10
voltage_fall_time_pulse1 0.999237 1.627953e-10
cur

## ITERATION - 3

In [26]:
# ==================== ITERATION 3: BATCHNORM + L2, NO DROPOUT ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0280120D'

BASE_DIR = "third_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE ANN with BATCHNORM + L2 =================
def build_ann(input_dim, output_dim, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop], verbose=1)

model.save(f"{BASE_DIR}/models/bn_l2_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL SCATTER PLOTS (TEST) ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='purple')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 0.2020 - mae: 0.2713 - val_loss: 0.0858 - val_mae: 0.1573
Epoch 2/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.0942 - mae: 0.1784 - val_loss: 0.0779 - val_mae: 0.1507
Epoch 3/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0834 - mae: 0.1650 - val_loss: 0.0747 - val_mae: 0.1509
Epoch 4/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0777 - mae: 0.1572 - val_loss: 0.0713 - val_mae: 0.1467
Epoch 5/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.0744 - mae: 0.1529 - val_loss: 0.0693 - val_mae: 0.1434
Epoch 6/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 15ms/step - loss: 0.0723 - mae: 0.1501 - val_loss: 0.0664 - val_mae: 0.1381
Epoch 7/200
[1m1119/1119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m



[1m2238/2238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.998576 2.223389e-10
voltage_fall_time_pulse1 0.998481 2.299636e-10
current_fall_time_pulse2 0.997048 5.502547e-10
current_fall_time_pulse1 0.996792 5.748204e-10
   ringing_frequency_MHz 0.990809 1.914749e-06
       overshoot_pulse_1 0.980332 2.165375e+00
voltage_rise_time_pulse1 0.976791 6.051882e-10
      undershoot_pulse_2 0.969839 1.991184e+00
      undershoot_pulse_1 0.965770 2.150019e+00
current_rise_time_pulse2 0.958849 6.395643e-09
       overshoot_pulse_2 0.918994 6.488559e+00
voltage_rise_time_pulse2 0.881075 1.676374e-09
current_rise_time_pulse1 0.724313 1.412521e-08
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.998593 2.206554e-10
voltage_fall_time_pulse1 0.998452 2.318317e-10
current

## ITERATION - 4

In [None]:
# ==================== ITERATION 4: DROPOUT + L2 + LR SCHEDULER ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0025120D'

BASE_DIR = "fourth_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE ANN with DROPOUT + BN + L2 =================
def build_ann(input_dim, output_dim, dropout=0.2, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_schedule = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop, lr_schedule], verbose=1)

model.save(f"{BASE_DIR}/models/dropout_l2_scheduler_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL SCATTER PLOTS (TEST) ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='teal')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.3939 - mae: 0.4335 - val_loss: 0.0870 - val_mae: 0.1555 - learning_rate: 0.0010
Epoch 2/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.1338 - mae: 0.2378 - val_loss: 0.0813 - val_mae: 0.1540 - learning_rate: 0.0010
Epoch 3/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 0.1207 - mae: 0.2251 - val_loss: 0.0759 - val_mae: 0.1512 - learning_rate: 0.0010
Epoch 4/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 0.1140 - mae: 0.2199 - val_loss: 0.0716 - val_mae: 0.1456 - learning_rate: 0.0010
Epoch 5/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 0.1101 - mae: 0.2167 - val_loss: 0.0659 - val_mae: 0.1325 - learning_rate: 0.0010
Epoch 6/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - loss: 0.1086 



[1m2233/2233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step

Evaluation on train:
                  Target       R2         RMSE
current_fall_time_pulse1 0.998024 3.403266e-10
current_fall_time_pulse2 0.997805 3.562515e-10
voltage_fall_time_pulse1 0.995632 1.181826e-10
voltage_fall_time_pulse2 0.995628 1.182210e-10
      undershoot_pulse_2 0.990147 1.210193e+00
      undershoot_pulse_1 0.990016 1.214965e+00
voltage_rise_time_pulse1 0.987355 1.593718e-10
   ringing_frequency_MHz 0.970072 4.113790e-06
       overshoot_pulse_1 0.962650 1.809418e+00
current_rise_time_pulse1 0.947605 9.359465e-09
       overshoot_pulse_2 0.925005 5.859627e+00
current_rise_time_pulse2 0.813601 5.189486e-09
voltage_rise_time_pulse2 0.727201 1.065312e-09
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

Evaluation on val:
                  Target       R2         RMSE
current_fall_time_pulse1 0.997900 3.517334e-10
current_fall_time_pulse2 0.997690 3.662144e-10
voltage

## ITERATION - 5

In [31]:
# ==================== ITERATION 5: PHYSICS FEATURES + ALL REGULARIZATION ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0025120D'
BASE_DIR = "fifth_iteration"

os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
seen_parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in seen_parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED PHYSICS FEATURES ==============
def compute_physics_features(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    f_resonance = 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6 if L_eq > 0 and C_eq > 0 else 0
    overshoot_est = row["VDS_max"] - row["Vbus"]
    undershoot_est = 0 - row["VGS_th_min"]
    dVdt_est = row["VDS_max"] / row["Tp1"] if row["Tp1"] != 0 else 0
    dIdt_est = row["ID_max_25C"] / row["Tp1"] if row["Tp1"] != 0 else 0
    return pd.Series([f_resonance, overshoot_est, undershoot_est, dVdt_est, dIdt_est])

for df_ in [train_df, test_df]:
    df_[['f_resonance', 'overshoot_est', 'undershoot_est', 'dVdt_est', 'dIdt_est']] = df_.apply(compute_physics_features, axis=1)

# ============== ENCODE DEVICE ==============
encoder = LabelEncoder()
encoder.fit(pd.concat([train_df['Part_Number'], test_df['Part_Number']]))
train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
] + ['f_resonance', 'overshoot_est', 'undershoot_est', 'dVdt_est', 'dIdt_est', 'Part_encoded']

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE ANN =================
def build_ann(input_dim, output_dim, dropout=0.2, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(128, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAINING ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_schedule = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop, lr_schedule], verbose=1)

model.save(f"{BASE_DIR}/models/iteration5_final_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)
    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL SCATTER PLOTS ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='darkblue')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.3994 - mae: 0.4354 - val_loss: 0.0878 - val_mae: 0.1569 - learning_rate: 0.0010
Epoch 2/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1347 - mae: 0.2395 - val_loss: 0.0784 - val_mae: 0.1466 - learning_rate: 0.0010
Epoch 3/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.1203 - mae: 0.2246 - val_loss: 0.0721 - val_mae: 0.1408 - learning_rate: 0.0010
Epoch 4/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.1137 - mae: 0.2193 - val_loss: 0.0706 - val_mae: 0.1430 - learning_rate: 0.0010
Epoch 5/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.1101 - mae: 0.2163 - val_loss: 0.0688 - val_mae: 0.1393 - learning_rate: 0.0010
Epoch 6/200
[1m1117/1117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.1081 -



[1m2233/2233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step  

Evaluation on train:
                  Target       R2         RMSE
current_fall_time_pulse1 0.998211 3.238791e-10
current_fall_time_pulse2 0.998126 3.291592e-10
voltage_fall_time_pulse1 0.995686 1.174541e-10
voltage_fall_time_pulse2 0.995679 1.175339e-10
      undershoot_pulse_2 0.990060 1.215536e+00
      undershoot_pulse_1 0.989934 1.219964e+00
   ringing_frequency_MHz 0.989704 2.231612e+00
voltage_rise_time_pulse1 0.987426 1.589193e-10
       overshoot_pulse_1 0.962610 1.810392e+00
current_rise_time_pulse1 0.947821 9.340172e-09
       overshoot_pulse_2 0.925218 5.851290e+00
current_rise_time_pulse2 0.812061 5.210873e-09
voltage_rise_time_pulse2 0.726974 1.065753e-09
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 937us/step

Evaluation on val:
                  Target       R2         RMSE
current_fall_time_pulse1 0.998138 3.312043e-10
current_fall_time_pulse2 0.998070 3.347482e-10
vol