LightGBM - 2nd Classical Model 

- Train/Validation/Test: 70%-15%-15% internal split from train_for_model.csv

- Holdout Evaluation: Evaluate on merged_test_with_features.csv

- Generalization Test: Train on merged_train_5_MOSFET.csv and test on merged_test_1_MOSFET.csv

In [3]:
# Usage: $env:PYTHONPATH="."; python Classical_Models\LGBM\lightgbm_train.py

import os
import time
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# === CONFIGURATION ===
FAST_MODE = True
TRAIN_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\processed\train_for_model.csv"
HOLDOUT_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\processed\merged_test_with_features.csv"
MODEL_DIR = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\Classical_Models\models\lightgbm\testing"
os.makedirs(MODEL_DIR, exist_ok=True)

TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID']

# === 1. Load & Sample Training Data ===
df = pd.read_csv(TRAIN_FILE)
df.dropna(subset=TARGET_COLUMNS, inplace=True)
if 'Device' in df.columns:
    df = df.drop(columns=['Device'])

if FAST_MODE:
    df = df.sample(frac=0.25, random_state=42)

X = df.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y = df[TARGET_COLUMNS]
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)

# === 2. Scaling ===
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)
y_test_scaled = scaler_y.transform(y_test)

joblib.dump(scaler_X, os.path.join(MODEL_DIR, "input_scaler.pkl"))
joblib.dump(scaler_y, os.path.join(MODEL_DIR, "output_scaler.pkl"))

# === 3. Train LightGBM ===
lgb_params = {
    "n_estimators": 100 if FAST_MODE else 300,
    "learning_rate": 0.05,
    "num_leaves": 31,
    "random_state": 42,
    "n_jobs": -1
}
model = MultiOutputRegressor(lgb.LGBMRegressor(**lgb_params))

start_time = time.time()
model.fit(X_train_scaled, y_train_scaled)
print(f"Training completed in {time.time() - start_time:.2f} seconds")

# === 4. Enhanced Evaluation Function ===
def evaluate(model, X_scaled, y_scaled, label, save_name):
    y_pred_scaled = model.predict(X_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_scaled)

    # === Compute RMSE and R²
    rmse = np.sqrt(mean_squared_error(y_true, y_pred, multioutput='raw_values'))
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')

    # === Pretty print results
    print(f"\n{label} Evaluation:")
    print(f"{'Target Output':<35} {'RMSE':>15} {'R² Score':>12}")
    print("-" * 64)
    for i, col in enumerate(TARGET_COLUMNS):
        rmse_sci = f"{rmse[i]:.2E}"
        r2_val = f"{r2[i]:.4f}"
        print(f"{col:<35} {rmse_sci:>15} {r2_val:>12}")

    # === Save predictions (actual vs predicted)
    df_result = pd.DataFrame({
        f"{col}_actual": y_true[:, i] for i, col in enumerate(TARGET_COLUMNS)
    })
    for i, col in enumerate(TARGET_COLUMNS):
        df_result[f"{col}_predicted"] = y_pred[:, i]

    predictions_path = os.path.join(MODEL_DIR, save_name)
    df_result.to_csv(predictions_path, index=False)
    print(f"Predictions saved to: {predictions_path}")

    # === Save RMSE + R² metrics
    df_metrics = pd.DataFrame({
        "Target": TARGET_COLUMNS,
        "RMSE": [f"{val:.2E}" for val in rmse],
        "R2_Score": [round(val, 4) for val in r2]
    })

    metrics_filename = save_name.replace(".csv", "_metrics.csv")
    metrics_path = os.path.join(MODEL_DIR, metrics_filename)
    df_metrics.to_csv(metrics_path, index=False)
    print(f"Metrics saved to: {metrics_path}")

# === 5. Evaluate All Splits ===
evaluate(model, X_val_scaled, y_val_scaled, "Validation", "validation_predictions.csv")
evaluate(model, X_test_scaled, y_test_scaled, "Internal Test", "internal_test_predictions.csv")

# === 6. Evaluate on Holdout Test ===
df_holdout = pd.read_csv(HOLDOUT_FILE)
df_holdout.dropna(subset=TARGET_COLUMNS, inplace=True)
if 'Device' in df_holdout.columns:
    df_holdout = df_holdout.drop(columns=['Device'])

X_holdout = df_holdout.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y_holdout = df_holdout[TARGET_COLUMNS]
X_holdout_scaled = scaler_X.transform(X_holdout)
y_holdout_scaled = scaler_y.transform(y_holdout)

evaluate(model, X_holdout_scaled, y_holdout_scaled, "External Holdout", "holdout_predictions.csv")

# === 7. Save Final Model ===
joblib.dump(model, os.path.join(MODEL_DIR, "model.pkl"))
print(f"\nModel and scalers saved to: {MODEL_DIR}")


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005366 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1102
[LightGBM] [Info] Number of data points in the train set: 84438, number of used features: 47
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003627 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1102
[LightGBM] [Info] Number of data points in the train set: 84438, number of used features: 47
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013139 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total 




Validation Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1                   3.74E-08       0.0090
voltage_rise_time_pulse2                   1.86E-07       0.9998
voltage_fall_time_pulse1                   3.70E-08       1.0000
voltage_fall_time_pulse2                   7.09E-08       0.9999
current_rise_time_pulse1                   8.37E-07       0.9944
current_rise_time_pulse2                   1.75E-07       0.9273
current_fall_time_pulse1                   5.64E-06       0.7564
current_fall_time_pulse2                   3.47E-08       0.2788
overshoot_pulse_1                          1.23E+00       0.9979
overshoot_pulse_2                          2.59E+00       0.9999
undershoot_pulse_1                         4.75E+00       0.9663
undershoot_pulse_2                         1.01E+01       0.9380
ringing_frequency_MHz                      1.67E+00       0.9986
P




Internal Test Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1                   2.53E-09       0.6722
voltage_rise_time_pulse2                   1.85E-07       0.9998
voltage_fall_time_pulse1                   3.64E-08       1.0000
voltage_fall_time_pulse2                   6.64E-08       0.9999
current_rise_time_pulse1                   6.18E-07       0.9969
current_rise_time_pulse2                   1.72E-07       0.9297
current_fall_time_pulse1                   5.48E-06       0.7719
current_fall_time_pulse2                   1.60E-08       0.5732
overshoot_pulse_1                          1.23E+00       0.9979
overshoot_pulse_2                          2.57E+00       0.9999
undershoot_pulse_1                         4.59E+00       0.9684
undershoot_pulse_2                         1.02E+01       0.9315
ringing_frequency_MHz                      1.64E+00       0.998




External Holdout Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1                   2.11E-10       0.8859
voltage_rise_time_pulse2                   2.32E-07       1.0000
voltage_fall_time_pulse1                   9.82E-08       0.9973
voltage_fall_time_pulse2                   1.09E-07       0.9993
current_rise_time_pulse1                   1.86E-07       0.9999
current_rise_time_pulse2                   1.07E-08       0.0460
current_fall_time_pulse1                   7.69E-07       0.9195
current_fall_time_pulse2                   1.15E-09       0.8431
overshoot_pulse_1                          1.14E+00       0.9482
overshoot_pulse_2                          5.01E+00       0.9998
undershoot_pulse_1                         2.94E+00       0.9346
undershoot_pulse_2                         3.82E+00       0.8963
ringing_frequency_MHz                      4.66E+00       0.

In [5]:
# Usage: $env:PYTHONPATH="."; python Classical_Models\LGBM\lightgbm_train.py

import os
import time
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# === CONFIGURATION ===
FAST_MODE = True
TRAIN_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\processed\train_for_model.csv"
HOLDOUT_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\processed\merged_test_with_features.csv"
MODEL_DIR = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\Classical_Models\models\lightgbm\ttrain_full"
os.makedirs(MODEL_DIR, exist_ok=True)

TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID']

# === 1. Load & Sample Training Data ===
df = pd.read_csv(TRAIN_FILE)
df.dropna(subset=TARGET_COLUMNS, inplace=True)
if 'Device' in df.columns:
    df = df.drop(columns=['Device'])

X = df.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y = df[TARGET_COLUMNS]
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)

# === 2. Scaling ===
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)
y_test_scaled = scaler_y.transform(y_test)

joblib.dump(scaler_X, os.path.join(MODEL_DIR, "input_scaler.pkl"))
joblib.dump(scaler_y, os.path.join(MODEL_DIR, "output_scaler.pkl"))

# === 3. Train LightGBM ===
lgb_params = {
    "n_estimators": 300,
    "learning_rate": 0.05,
    "num_leaves": 31,
    "random_state": 42,
    "n_jobs": -1
}
model = MultiOutputRegressor(lgb.LGBMRegressor(**lgb_params))

start_time = time.time()
model.fit(X_train_scaled, y_train_scaled)
print(f"Training completed in {time.time() - start_time:.2f} seconds")

# === 4. Enhanced Evaluation Function ===
def evaluate(model, X_scaled, y_scaled, label, save_name):
    y_pred_scaled = model.predict(X_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_scaled)

    # === Compute RMSE and R²
    rmse = np.sqrt(mean_squared_error(y_true, y_pred, multioutput='raw_values'))
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')

    # === Pretty print results
    print(f"\n{label} Evaluation:")
    print(f"{'Target Output':<35} {'RMSE':>15} {'R² Score':>12}")
    print("-" * 64)
    for i, col in enumerate(TARGET_COLUMNS):
        rmse_sci = f"{rmse[i]:.2E}"
        r2_val = f"{r2[i]:.4f}"
        print(f"{col:<35} {rmse_sci:>15} {r2_val:>12}")

    # === Save predictions (actual vs predicted)
    df_result = pd.DataFrame({
        f"{col}_actual": y_true[:, i] for i, col in enumerate(TARGET_COLUMNS)
    })
    for i, col in enumerate(TARGET_COLUMNS):
        df_result[f"{col}_predicted"] = y_pred[:, i]

    predictions_path = os.path.join(MODEL_DIR, save_name)
    df_result.to_csv(predictions_path, index=False)
    print(f"Predictions saved to: {predictions_path}")

    # === Save RMSE + R² metrics
    df_metrics = pd.DataFrame({
        "Target": TARGET_COLUMNS,
        "RMSE": [f"{val:.2E}" for val in rmse],
        "R2_Score": [round(val, 4) for val in r2]
    })

    metrics_filename = save_name.replace(".csv", "_metrics.csv")
    metrics_path = os.path.join(MODEL_DIR, metrics_filename)
    df_metrics.to_csv(metrics_path, index=False)
    print(f"Metrics saved to: {metrics_path}")

# === 5. Evaluate All Splits ===
evaluate(model, X_val_scaled, y_val_scaled, "Validation", "validation_predictions.csv")
evaluate(model, X_test_scaled, y_test_scaled, "Internal Test", "internal_test_predictions.csv")

# === 6. Evaluate on Holdout Test ===
df_holdout = pd.read_csv(HOLDOUT_FILE)
df_holdout.dropna(subset=TARGET_COLUMNS, inplace=True)
if 'Device' in df_holdout.columns:
    df_holdout = df_holdout.drop(columns=['Device'])

X_holdout = df_holdout.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y_holdout = df_holdout[TARGET_COLUMNS]
X_holdout_scaled = scaler_X.transform(X_holdout)
y_holdout_scaled = scaler_y.transform(y_holdout)

evaluate(model, X_holdout_scaled, y_holdout_scaled, "External Holdout", "holdout_predictions.csv")

# === 7. Save Final Model ===
joblib.dump(model, os.path.join(MODEL_DIR, "model.pkl"))
print(f"\nModel and scalers saved to: {MODEL_DIR}")


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.018781 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1110
[LightGBM] [Info] Number of data points in the train set: 337756, number of used features: 47
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066300 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1110
[LightGBM] [Info] Number of data points in the train set: 337756, number of used features: 47
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084085 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1110
[LightGBM] [Info] Number of data points in the train




Validation Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1                   1.89E-08       0.0299
voltage_rise_time_pulse2                   1.50E-07       0.9999
voltage_fall_time_pulse1                   3.42E-09       1.0000
voltage_fall_time_pulse2                   5.62E-08       0.9999
current_rise_time_pulse1                   6.69E-07       0.9965
current_rise_time_pulse2                   1.60E-07       0.9387
current_fall_time_pulse1                   4.73E-06       0.8159
current_fall_time_pulse2                   2.25E-08       0.4068
overshoot_pulse_1                          7.50E-01       0.9992
overshoot_pulse_2                          1.19E+00       1.0000
undershoot_pulse_1                         3.19E+00       0.9844
undershoot_pulse_2                         8.27E+00       0.9550
ringing_frequency_MHz                      1.16E+00       0.9993
P




Internal Test Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1                   1.68E-08       0.2314
voltage_rise_time_pulse2                   1.46E-07       0.9999
voltage_fall_time_pulse1                   3.26E-09       1.0000
voltage_fall_time_pulse2                   5.07E-08       0.9999
current_rise_time_pulse1                   6.62E-07       0.9966
current_rise_time_pulse2                   1.57E-07       0.9411
current_fall_time_pulse1                   4.85E-06       0.8171
current_fall_time_pulse2                   2.76E-08       0.3252
overshoot_pulse_1                          7.37E-01       0.9992
overshoot_pulse_2                          1.20E+00       1.0000
undershoot_pulse_1                         2.10E+04       0.0000
undershoot_pulse_2                         7.92E+00       0.9588
ringing_frequency_MHz                      1.15E+00       0.999




External Holdout Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1                   1.43E-10       0.9474
voltage_rise_time_pulse2                   2.55E-09       1.0000
voltage_fall_time_pulse1                   8.59E-09       1.0000
voltage_fall_time_pulse2                   8.75E-09       1.0000
current_rise_time_pulse1                   5.50E-08       1.0000
current_rise_time_pulse2                   5.45E-09       0.7538
current_fall_time_pulse1                   3.20E-07       0.9861
current_fall_time_pulse2                   5.88E-10       0.9593
overshoot_pulse_1                          6.10E-01       0.9852
overshoot_pulse_2                          1.00E+00       1.0000
undershoot_pulse_1                         1.68E+00       0.9787
undershoot_pulse_2                         1.98E+00       0.9721
ringing_frequency_MHz                      3.87E+00       0.

In [10]:

# Usage: $env:PYTHONPATH="."; python Classical_Models\LGBM\lightgbm_generalization.py

import os
import time
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# === CONFIGURATION ===
FAST_MODE = True
TRAIN_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\raw\merged_train_5_MOSFETS.csv"
TEST_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\raw\merged_test_1_MOSFET.csv"
MODEL_DIR = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\Classical_Models\models\lightgbm\generalization"
os.makedirs(MODEL_DIR, exist_ok=True)

TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID']

# === 1. Load & Sample Training Data ===
df_train = pd.read_csv(TRAIN_FILE)
df_train.dropna(subset=TARGET_COLUMNS, inplace=True)
if 'Device' in df_train.columns:
    df_train = df_train.drop(columns=['Device'])

X_train = df_train.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y_train = df_train[TARGET_COLUMNS]

# === 2. Load Unseen Test Data ===
df_test = pd.read_csv(TEST_FILE)
df_test.dropna(subset=TARGET_COLUMNS, inplace=True)
if 'Device' in df_test.columns:
    df_test = df_test.drop(columns=['Device'])

X_test = df_test.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y_test = df_test[TARGET_COLUMNS]

# === 3. Scaling ===
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

joblib.dump(scaler_X, os.path.join(MODEL_DIR, "input_scaler.pkl"))
joblib.dump(scaler_y, os.path.join(MODEL_DIR, "output_scaler.pkl"))

# === 4. Train LightGBM ===
lgb_params = {
    "n_estimators": 300,
    "learning_rate": 0.05,
    "num_leaves": 31,
    "random_state": 42,
    "n_jobs": -1
}
model = MultiOutputRegressor(lgb.LGBMRegressor(**lgb_params))

start_time = time.time()
model.fit(X_train_scaled, y_train_scaled)
print(f"Training completed in {time.time() - start_time:.2f} seconds")

# === 5. Evaluation ===
def evaluate(model, X_scaled, y_scaled, label, save_name):
    y_pred_scaled = model.predict(X_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_scaled)

    # === Metrics
    rmse = np.sqrt(mean_squared_error(y_true, y_pred, multioutput='raw_values'))
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')

    print(f"\n{label} Evaluation:")
    print(f"{'Target Output':<35} {'RMSE':>15} {'R² Score':>12}")
    print("-" * 64)
    for i, col in enumerate(TARGET_COLUMNS):
        print(f"{col:<35} {rmse[i]:.2E} {r2[i]:>12.4f}")

    # === Save predictions
    df_result = pd.DataFrame({f"{col}_actual": y_true[:, i] for i, col in enumerate(TARGET_COLUMNS)})
    for i, col in enumerate(TARGET_COLUMNS):
        df_result[f"{col}_predicted"] = y_pred[:, i]
    df_result.to_csv(os.path.join(MODEL_DIR, save_name), index=False)

    # === Save metrics
    df_metrics = pd.DataFrame({
        "Target": TARGET_COLUMNS,
        "RMSE": [f"{val:.2E}" for val in rmse],
        "R2_Score": [round(val, 4) for val in r2]
    })
    metrics_filename = save_name.replace(".csv", "_metrics.csv")
    df_metrics.to_csv(os.path.join(MODEL_DIR, metrics_filename), index=False)
    print(f"Results saved to: {save_name} and {metrics_filename}")

# === 6. Evaluate on Unseen Test ===
evaluate(model, X_test_scaled, y_test_scaled, "Generalization (Unseen MOSFET)", "unseen_predictions.csv")

# === 7. Save Model ===
joblib.dump(model, os.path.join(MODEL_DIR, "model.pkl"))
print(f"\nModel and scalers saved to: {MODEL_DIR}")


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023094 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 481645, number of used features: 47
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.026199 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 481645, number of used features: 47
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025106 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not e




Generalization (Unseen MOSFET) Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1            2.13E-08       0.0233
voltage_rise_time_pulse2            3.78E-07       0.0687
voltage_fall_time_pulse1            8.40E-09      -5.7392
voltage_fall_time_pulse2            5.68E-08       0.6886
current_rise_time_pulse1            8.61E-07       0.6286
current_rise_time_pulse2            8.53E-07      -0.0997
current_fall_time_pulse1            7.55E-06       0.3638
current_fall_time_pulse2            2.98E-08      -0.3102
overshoot_pulse_1                   1.79E+01       0.4983
overshoot_pulse_2                   1.32E+01       0.8971
undershoot_pulse_1                  1.15E+03   -1626.0109
undershoot_pulse_2                  2.53E+01       0.6542
ringing_frequency_MHz               3.64E+01     -28.7590
Results saved to: unseen_predictions.csv and unseen_predictions_metrics.

Generalization (Unseen MOSFET) Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1            2.13E-08       0.0233
voltage_rise_time_pulse2            3.78E-07       0.0687
voltage_fall_time_pulse1            8.40E-09      -5.7392
voltage_fall_time_pulse2            5.68E-08       0.6886
current_rise_time_pulse1            8.61E-07       0.6286
current_rise_time_pulse2            8.53E-07      -0.0997
current_fall_time_pulse1            7.55E-06       0.3638
current_fall_time_pulse2            2.98E-08      -0.3102
overshoot_pulse_1                   1.79E+01       0.4983
overshoot_pulse_2                   1.32E+01       0.8971
undershoot_pulse_1                  1.15E+03   -1626.0109
undershoot_pulse_2                  2.53E+01       0.6542
ringing_frequency_MHz               3.64E+01     -28.7590
Results saved to: unseen_predictions.csv and unseen_predictions_metrics.csv