# Baseline ANN Model 

Here we are Sampling from the complete dataset to 25% from each unique simulation combination of:


['Vbus', 'Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']

| Iteration | Dropout | L2 Reg. | BatchNorm | EarlyStopping | LR Scheduler      | Scaling                              | Physics Features | Notes                               |
| --------- | ------- | ------- | --------- | ------------- | ----------------- | ------------------------------------ | ---------------- | ----------------------------------- |
| **1**     | —       | —       | —         | —             | —                 | StdScaler (X,y)                      | No               | Pure vanilla ANN                    |
| **2**     | 0.2     | —       | —         | Yes           | —                 | StdScaler (X,y)                      | No               | Added dropout + early stop          |
| **3**     | 0.2     | 1e-4    | —         | Yes           | —                 | StdScaler (X,y)                      | No               | Best baseline (good generalisation) |
| **4**     | 0.2     | 1e-4    | —         | Yes           | ReduceLROnPlateau | StdScaler (X) + Per-target y scaling | No               | LR scheduling added                 |
| **5**     | 0.2     | 1e-4    | Yes       | Yes           | ReduceLROnPlateau | StdScaler (X) + Ringing→MinMax       | Yes              | Physics features added              |


## ITERATION - 1

In [1]:
# ==================== ITERATION 1: BASELINE ANN WITH PART_NUMBER GENERALIZATION ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0280120D'

BASE_DIR = "first_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent_balanced.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")  # Add encoded device as feature

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE BASELINE ANN ==============
def build_ann(input_dim, output_dim, dropout=0.2, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop], verbose=1)

model.save(f"{BASE_DIR}/models/baseline_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results


# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL (INTERNAL TEST) SCATTER PLOTS ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='lightblue')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()



Epoch 1/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.2624 - mae: 0.3301 - val_loss: 0.0934 - val_mae: 0.1544
Epoch 2/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1239 - mae: 0.2124 - val_loss: 0.0853 - val_mae: 0.1490
Epoch 3/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1146 - mae: 0.2013 - val_loss: 0.0805 - val_mae: 0.1399
Epoch 4/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1089 - mae: 0.1962 - val_loss: 0.0780 - val_mae: 0.1407
Epoch 5/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1050 - mae: 0.1932 - val_loss: 0.0757 - val_mae: 0.1340
Epoch 6/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1036 - mae: 0.1910 - val_loss: 0.0752 - val_mae: 0.1312
Epoch 7/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



[1m2294/2294[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 734us/step

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.996887 3.358734e-10
voltage_fall_time_pulse1 0.996730 3.446123e-10
current_fall_time_pulse1 0.994112 9.425589e-10
current_fall_time_pulse2 0.994040 9.428344e-10
voltage_rise_time_pulse1 0.976340 5.327053e-10
   ringing_frequency_MHz 0.974398 2.713996e-06
      undershoot_pulse_2 0.971263 1.845545e+00
       overshoot_pulse_1 0.969172 2.369116e+00
      undershoot_pulse_1 0.968585 1.934708e+00
voltage_rise_time_pulse2 0.895608 1.422548e-09
current_rise_time_pulse2 0.874312 9.646222e-09
       overshoot_pulse_2 0.869635 7.144922e+00
current_rise_time_pulse1 0.670563 1.359558e-08
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 783us/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.996730 3.431592e-10
voltage_fall_time_pulse1 0.996633 3.486319e-10
cur

## ITERATION - 2

In [2]:
# ==================== ITERATION 2: NO REGULARIZATION ANN ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0280120D'

BASE_DIR = "second_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent_balanced.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")  # Add encoded device as feature

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE NO REGULARIZATION ANN ==============
def build_ann(input_dim, output_dim):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation='relu'),
        layers.Dense(128, activation='relu'),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop], verbose=1)

model.save(f"{BASE_DIR}/models/no_regularization_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL (INTERNAL TEST) SCATTER PLOTS ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='orange')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1634 - mae: 0.2348 - val_loss: 0.0746 - val_mae: 0.1431
Epoch 2/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0696 - mae: 0.1345 - val_loss: 0.0657 - val_mae: 0.1268
Epoch 3/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0639 - mae: 0.1233 - val_loss: 0.0617 - val_mae: 0.1185
Epoch 4/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0622 - mae: 0.1191 - val_loss: 0.0601 - val_mae: 0.1151
Epoch 5/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0607 - mae: 0.1159 - val_loss: 0.0625 - val_mae: 0.1172
Epoch 6/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0590 - mae: 0.1131 - val_loss: 0.0601 - val_mae: 0.1131
Epoch 7/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



[1m2294/2294[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 637us/step

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.999015 1.889026e-10
voltage_fall_time_pulse1 0.999006 1.900073e-10
current_fall_time_pulse2 0.998493 4.741788e-10
current_fall_time_pulse1 0.998469 4.806277e-10
   ringing_frequency_MHz 0.995367 1.154531e-06
voltage_rise_time_pulse1 0.989127 3.611277e-10
       overshoot_pulse_1 0.981652 1.827739e+00
      undershoot_pulse_2 0.979909 1.543133e+00
      undershoot_pulse_1 0.977799 1.626431e+00
voltage_rise_time_pulse2 0.912729 1.300669e-09
current_rise_time_pulse2 0.899569 8.622724e-09
       overshoot_pulse_2 0.883233 6.762028e+00
current_rise_time_pulse1 0.689346 1.320231e-08
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 667us/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.998994 1.903870e-10
voltage_fall_time_pulse1 0.998973 1.925826e-10
cur

## ITERATION - 3

In [3]:
# ==================== ITERATION 3: BATCHNORM + L2, NO DROPOUT ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0280120D'

BASE_DIR = "third_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent_balanced.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE ANN with BATCHNORM + L2 =================
def build_ann(input_dim, output_dim, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop], verbose=1)

model.save(f"{BASE_DIR}/models/bn_l2_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL SCATTER PLOTS (TEST) ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='purple')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.2391 - mae: 0.3007 - val_loss: 0.0960 - val_mae: 0.1617
Epoch 2/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1126 - mae: 0.1927 - val_loss: 0.0983 - val_mae: 0.1678
Epoch 3/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1017 - mae: 0.1806 - val_loss: 0.0813 - val_mae: 0.1419
Epoch 4/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0959 - mae: 0.1711 - val_loss: 0.0808 - val_mae: 0.1461
Epoch 5/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0920 - mae: 0.1664 - val_loss: 0.0776 - val_mae: 0.1502
Epoch 6/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0880 - mae: 0.1609 - val_loss: 0.0783 - val_mae: 0.1410
Epoch 7/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



[1m2294/2294[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 750us/step

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse1 0.998150 2.591866e-10
voltage_fall_time_pulse2 0.998090 2.630559e-10
current_fall_time_pulse2 0.997176 6.490208e-10
current_fall_time_pulse1 0.997121 6.591136e-10
   ringing_frequency_MHz 0.994104 1.302421e-06
voltage_rise_time_pulse1 0.980979 4.776322e-10
       overshoot_pulse_1 0.977588 2.020023e+00
      undershoot_pulse_2 0.971257 1.845735e+00
      undershoot_pulse_1 0.969922 1.893093e+00
voltage_rise_time_pulse2 0.890344 1.457968e-09
current_rise_time_pulse2 0.885950 9.188802e-09
       overshoot_pulse_2 0.877887 6.915084e+00
current_rise_time_pulse1 0.678903 1.342240e-08
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 665us/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse1 0.998172 2.568711e-10
voltage_fall_time_pulse2 0.998109 2.609907e-10
cur

## ITERATION - 4

In [4]:
# ==================== ITERATION 4: DROPOUT + L2 + LR SCHEDULER ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0025120D'

BASE_DIR = "fourth_iteration"
os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent_balanced.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED FEATURE ==============
def compute_ringing_frequency(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    if L_eq > 0 and C_eq > 0:
        return 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6
    return np.nan

for df_ in [train_df, test_df]:
    df_['ringing_frequency_MHz'] = df_.apply(compute_ringing_frequency, axis=1)

# ============== ENCODE DEVICE =================
encoder = LabelEncoder()
all_parts = pd.concat([train_df['Part_Number'], test_df['Part_Number']])
encoder.fit(all_parts)

train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
]
INPUT_COLUMNS.append("Part_encoded")

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE ANN with DROPOUT + BN + L2 =================
def build_ann(input_dim, output_dim, dropout=0.2, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAIN MODEL ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_schedule = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop, lr_schedule], verbose=1)

model.save(f"{BASE_DIR}/models/dropout_l2_scheduler_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)

    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL SCATTER PLOTS (TEST) ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='teal')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.3545 - mae: 0.4059 - val_loss: 0.0737 - val_mae: 0.1372 - learning_rate: 0.0010
Epoch 2/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1260 - mae: 0.2258 - val_loss: 0.0609 - val_mae: 0.1303 - learning_rate: 0.0010
Epoch 3/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1051 - mae: 0.2063 - val_loss: 0.0572 - val_mae: 0.1311 - learning_rate: 0.0010
Epoch 4/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1002 - mae: 0.2029 - val_loss: 0.0519 - val_mae: 0.1189 - learning_rate: 0.0010
Epoch 5/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0943 - mae: 0.1965 - val_loss: 0.0494 - val_mae: 0.1130 - learning_rate: 0.0010
Epoch 6/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0897 -



[1m2294/2294[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 763us/step

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.997708 2.578707e-10
voltage_fall_time_pulse1 0.997708 2.582018e-10
current_fall_time_pulse1 0.997197 6.543143e-10
current_fall_time_pulse2 0.997169 6.563879e-10
      undershoot_pulse_2 0.984734 1.694730e+00
      undershoot_pulse_1 0.983744 1.745580e+00
voltage_rise_time_pulse1 0.982606 4.250890e-10
   ringing_frequency_MHz 0.979833 4.252868e-06
       overshoot_pulse_1 0.972761 2.107410e+00
current_rise_time_pulse1 0.942562 1.152708e-08
       overshoot_pulse_2 0.942214 6.598523e+00
voltage_rise_time_pulse2 0.878941 1.342769e-09
current_rise_time_pulse2 0.862826 8.734629e-09
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.997720 2.560418e-10
voltage_fall_time_pulse1 0.997711 2.568564e-10
cur

## ITERATION - 5

In [5]:
# ==================== ITERATION 5: PHYSICS FEATURES + ALL REGULARIZATION ====================
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
import matplotlib.pyplot as plt

# ============== SETTINGS ==============
SEED = 42
UNSEEN_PART = 'C2M0025120D'
BASE_DIR = "fifth_iteration"

os.makedirs(f"{BASE_DIR}/r2_rmse_tables", exist_ok=True)
os.makedirs(f"{BASE_DIR}/train_val_loss_curves", exist_ok=True)
os.makedirs(f"{BASE_DIR}/predicted_vs_actual", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

# ============== LOAD DATA ==============
df = pd.read_csv(r"C:\Users\pc\Desktop\Neural_Network_Models\merged_train_5_MOSFETs_25percent_balanced.csv")

# ============== TARGETS + DROPS ==============
TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID', 'MOSFET', 'Part_Number']

# ============== SPLIT SEEN/UNSEEN DEVICES ==============
seen_parts = df['Part_Number'].unique().tolist()
seen_parts = [p for p in seen_parts if p != UNSEEN_PART]
train_df = df[df['Part_Number'].isin(seen_parts)].copy()
test_df = df[df['Part_Number'] == UNSEEN_PART].copy()

# ============== DERIVED PHYSICS FEATURES ==============
def compute_physics_features(row):
    L_eq = row[['Ls4', 'Ls5', 'Ls6', 'Ls7', 'Ls8', 'Ls9', 'Ls10', 'Ls11']].sum()
    C_eq = row.get("Coss", 1e-12)
    f_resonance = 1 / (2 * np.pi * np.sqrt(L_eq * C_eq)) / 1e6 if L_eq > 0 and C_eq > 0 else 0
    overshoot_est = row["VDS_max"] - row["Vbus"]
    undershoot_est = 0 - row["VGS_th_min"]
    dVdt_est = row["VDS_max"] / row["Tp1"] if row["Tp1"] != 0 else 0
    dIdt_est = row["ID_max_25C"] / row["Tp1"] if row["Tp1"] != 0 else 0
    return pd.Series([f_resonance, overshoot_est, undershoot_est, dVdt_est, dIdt_est])

for df_ in [train_df, test_df]:
    df_[['f_resonance', 'overshoot_est', 'undershoot_est', 'dVdt_est', 'dIdt_est']] = df_.apply(compute_physics_features, axis=1)

# ============== ENCODE DEVICE ==============
encoder = LabelEncoder()
encoder.fit(pd.concat([train_df['Part_Number'], test_df['Part_Number']]))
train_df['Part_encoded'] = encoder.transform(train_df['Part_Number'])
test_df['Part_encoded'] = encoder.transform(test_df['Part_Number'])

# ============== INPUT FEATURES ==============
INPUT_COLUMNS = [
    col for col in df.columns if col not in TARGET_COLUMNS + DROP_COLUMNS
] + ['f_resonance', 'overshoot_est', 'undershoot_est', 'dVdt_est', 'dIdt_est', 'Part_encoded']

# ============== SCALE INPUTS ==============
input_scaler = StandardScaler()
input_scaler.fit(pd.concat([train_df[INPUT_COLUMNS], test_df[INPUT_COLUMNS]]))
X_train_all = input_scaler.transform(train_df[INPUT_COLUMNS])
X_test_all = input_scaler.transform(test_df[INPUT_COLUMNS])

# ============== SCALE OUTPUTS ==============
output_scalers = {}
y_train_scaled = pd.DataFrame()
y_test_scaled = pd.DataFrame()

for col in TARGET_COLUMNS:
    scaler = MinMaxScaler() if col == 'ringing_frequency_MHz' else StandardScaler()
    y_train_scaled[col] = scaler.fit_transform(train_df[[col]]).flatten()
    y_test_scaled[col] = scaler.transform(test_df[[col]]).flatten()
    output_scalers[col] = scaler

# ============== SPLIT TRAIN/VAL (70/15/15) ==============
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_scaled.values, test_size=0.15, random_state=SEED
)

# ============== DEFINE ANN =================
def build_ann(input_dim, output_dim, dropout=0.2, l2_reg=1e-4):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(128, kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(dropout),
        layers.Dense(output_dim, activation='linear')
    ])
    return model

model = build_ann(X_train.shape[1], y_train.shape[1])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ============== TRAINING ==============
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_schedule = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=200, batch_size=64, callbacks=[early_stop, lr_schedule], verbose=1)

model.save(f"{BASE_DIR}/models/iteration5_final_ann.h5")

# ============== SAVE LOSS CURVE ==============
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Train vs Validation Loss")
plt.legend()
plt.savefig(f"{BASE_DIR}/train_val_loss_curves/loss.png")
plt.close()

# ============== EVALUATION FUNCTION ==============
def evaluate_and_save(X, y_scaled, name, filter_positive=False):
    y_pred_scaled = model.predict(X)
    results = []
    for i, col in enumerate(TARGET_COLUMNS):
        y_true = output_scalers[col].inverse_transform(y_scaled[:, i].reshape(-1, 1)).flatten()
        y_pred = output_scalers[col].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).flatten()
        r2 = r2_score(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        results.append((col, r2, rmse))

    df_results = pd.DataFrame(results, columns=["Target", "R2", "RMSE"])
    df_results = df_results.sort_values("R2", ascending=False)
    if filter_positive:
        df_results = df_results[df_results["R2"] > 0]
    else:
        print(f"\nEvaluation on {name}:")

    print(df_results.to_string(index=False))
    df_results.to_csv(f"{BASE_DIR}/r2_rmse_tables/{name}.csv", index=False)
    return df_results

# ============== RUN EVALUATIONS ==============
evaluate_and_save(X_train, y_train, "train")
evaluate_and_save(X_val, y_val, "val")
evaluate_and_save(X_train_all, y_train_scaled.values, "test")
evaluate_and_save(X_test_all, y_test_scaled.values, "unseen", filter_positive=True)

# ============== PREDICTED vs ACTUAL SCATTER PLOTS ==============
y_pred_scaled_test = model.predict(X_train_all)

fig, axes = plt.subplots(5, 3, figsize=(15, 20))
axes = axes.flatten()

for i, col in enumerate(TARGET_COLUMNS):
    y_true = output_scalers[col].inverse_transform(y_train_scaled[col].values.reshape(-1, 1)).flatten()
    y_pred = output_scalers[col].inverse_transform(y_pred_scaled_test[:, i].reshape(-1, 1)).flatten()
    axes[i].scatter(y_true, y_pred, s=10, alpha=0.6, color='darkblue')
    axes[i].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    axes[i].set_title(f"{col}")
    axes[i].set_xlabel("Actual")
    axes[i].set_ylabel("Predicted")

plt.tight_layout()
plt.savefig(f"{BASE_DIR}/predicted_vs_actual/scatter_internal_test.png")
plt.close()


Epoch 1/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.3478 - mae: 0.4006 - val_loss: 0.0780 - val_mae: 0.1469 - learning_rate: 0.0010
Epoch 2/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1237 - mae: 0.2241 - val_loss: 0.0676 - val_mae: 0.1427 - learning_rate: 0.0010
Epoch 3/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1080 - mae: 0.2093 - val_loss: 0.0570 - val_mae: 0.1298 - learning_rate: 0.0010
Epoch 4/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0981 - mae: 0.2005 - val_loss: 0.0519 - val_mae: 0.1231 - learning_rate: 0.0010
Epoch 5/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0945 - mae: 0.1966 - val_loss: 0.0486 - val_mae: 0.1150 - learning_rate: 0.0010
Epoch 6/200
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0921 -



[1m2294/2294[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step  

Evaluation on train:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.998039 2.385409e-10
voltage_fall_time_pulse1 0.998034 2.391438e-10
current_fall_time_pulse2 0.997526 6.136638e-10
current_fall_time_pulse1 0.997525 6.148464e-10
   ringing_frequency_MHz 0.989397 3.174561e+00
      undershoot_pulse_2 0.984345 1.716170e+00
      undershoot_pulse_1 0.983250 1.771914e+00
voltage_rise_time_pulse1 0.981148 4.425448e-10
       overshoot_pulse_1 0.971991 2.136983e+00
       overshoot_pulse_2 0.943000 6.553546e+00
current_rise_time_pulse1 0.942939 1.148919e-08
voltage_rise_time_pulse2 0.879842 1.337767e-09
current_rise_time_pulse2 0.863467 8.714201e-09
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

Evaluation on val:
                  Target       R2         RMSE
voltage_fall_time_pulse2 0.998094 2.340690e-10
voltage_fall_time_pulse1 0.998065 2.361723e-10
curre