In [7]:
# svr_baseline_25pct_701515_eval.py
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# ===================== Config =====================
TRAIN_FILE = r"C:\Users\pc\Desktop\Classical_Models\MERGED_ORIGINAL\Train_5_MOSFETs.csv"  # <-- set this
RANDOM_STATE = 42
FAST_FRAC = 0.25   # 25% of data for speed
GRID_COLS = 3      # used if you later add plots (kept here for completeness)
# ==================================================

TARGETS = [
    'voltage_rise_time_pulse1','voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1','voltage_fall_time_pulse2',
    'current_rise_time_pulse1','current_rise_time_pulse2',
    'current_fall_time_pulse1','current_fall_time_pulse2',
    'overshoot_pulse_1','overshoot_pulse_2',
    'undershoot_pulse_1','undershoot_pulse_2',
    'ringing_frequency_MHz'
]

def fast_sample(df, frac=0.25, seed=42):
    """Simple global 25% sample (fast)."""
    if 0 < frac < 1.0:
        return df.sample(frac=frac, random_state=seed).reset_index(drop=True)
    return df.reset_index(drop=True)

def compute_metrics_table(y_true, y_pred):
    rows = []
    for i, t in enumerate(TARGETS):
        yt, yp = y_true[:, i], y_pred[:, i]
        r2  = r2_score(yt, yp)
        rmse = np.sqrt(mean_squared_error(yt, yp))
        mae  = mean_absolute_error(yt, yp)
        rows.append([t, f"{r2:.4f}", f"{rmse:.4e}", f"{mae:.4e}"])
    df = pd.DataFrame(rows, columns=["Target", "R2", "RMSE", "MAE"])
    # overall (avg) in same formatting
    r2_avg  = f"{np.mean(df['R2'].astype(float)):.4f}"
    rmse_avg = f"{np.mean(df['RMSE'].astype(float)):.4e}"
    mae_avg  = f"{np.mean(df['MAE'].astype(float)):.4e}"
    overall = pd.Series({"R2_avg": r2_avg, "RMSE_avg": rmse_avg, "MAE_avg": mae_avg})
    return df, overall

def predict_with_timing(model, X):
    t0 = time.time()
    y_pred = model.predict(X)
    ms_per_sample = (time.time() - t0) / max(1, len(X)) * 1000.0
    return y_pred, ms_per_sample

def main():
    # -------- Load & prep --------
    df = pd.read_csv(TRAIN_FILE).dropna().reset_index(drop=True)

    # Drop object/string columns (e.g., Part_Number)
    obj_cols = [c for c in df.columns if df[c].dtype == 'object']
    if obj_cols:
        df = df.drop(columns=obj_cols)

    # Keep only rows that contain all targets
    df = df.dropna(subset=TARGETS)

    # Fast 25% sample
    df = fast_sample(df, frac=FAST_FRAC, seed=RANDOM_STATE)
    print(f"[INFO] FAST sample: {len(df)} rows")

    # Features / targets
    feature_cols = [c for c in df.columns if c not in TARGETS]
    X_all = df[feature_cols].values
    y_all = df[TARGETS].values

    # -------- 70 / 15 / 15 split --------
    X_train, X_temp, y_train, y_temp = train_test_split(
        X_all, y_all, test_size=0.30, random_state=RANDOM_STATE, shuffle=True
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.50, random_state=RANDOM_STATE, shuffle=True
    )

    # -------- Scaling (X & y) --------
    x_scaler = StandardScaler()
    y_scaler = StandardScaler()

    X_train_s = x_scaler.fit_transform(X_train)
    X_val_s   = x_scaler.transform(X_val)
    X_test_s  = x_scaler.transform(X_test)

    y_train_s = y_scaler.fit_transform(y_train)
    y_val_s   = y_scaler.transform(y_val)
    y_test_s  = y_scaler.transform(y_test)

    # -------- Model (fast) --------
    model = MultiOutputRegressor(LinearSVR(C=1.0, max_iter=2000, random_state=RANDOM_STATE))

    t0 = time.time()
    model.fit(X_train_s, y_train_s)
    train_time = time.time() - t0

    # -------- Evaluate: TRAIN --------
    y_tr_pred_s, tr_ms = predict_with_timing(model, X_train_s)
    y_tr_pred = y_scaler.inverse_transform(y_tr_pred_s)
    y_tr_true = y_train  # already original scale
    tr_tbl, tr_avg = compute_metrics_table(y_tr_true, y_tr_pred)
    print("\n=== Train Set ===")
    print(tr_tbl.to_string(index=False))
    print(tr_avg)
    print(f"Predict Time (ms/sample): {tr_ms:.4f}")

    # -------- Evaluate: VAL --------
    y_val_pred_s, va_ms = predict_with_timing(model, X_val_s)
    y_val_pred = y_scaler.inverse_transform(y_val_pred_s)
    va_tbl, va_avg = compute_metrics_table(y_val, y_val_pred)
    print("\n=== Validation Set ===")
    print(va_tbl.to_string(index=False))
    print(va_avg)
    print(f"Train Time (s): {train_time:.4f}")
    print(f"Predict Time (ms/sample): {va_ms:.4f}")

    # -------- Evaluate: TEST --------
    y_te_pred_s, te_ms = predict_with_timing(model, X_test_s)
    y_te_pred = y_scaler.inverse_transform(y_te_pred_s)
    te_tbl, te_avg = compute_metrics_table(y_test, y_te_pred)
    print("\n=== Test Set ===")
    print(te_tbl.to_string(index=False))
    print(te_avg)
    print(f"Predict Time (ms/sample): {te_ms:.4f}")

if __name__ == "__main__":
    main()


[INFO] FAST sample: 107919 rows





=== Train Set ===
                  Target     R2       RMSE        MAE
voltage_rise_time_pulse1 0.9108 1.2028e-09 8.7621e-10
voltage_rise_time_pulse2 0.5693 2.9040e-09 1.0028e-09
voltage_fall_time_pulse1 0.9813 9.3199e-10 6.6378e-10
voltage_fall_time_pulse2 0.9813 9.3004e-10 6.6199e-10
current_rise_time_pulse1 0.8823 1.6188e-08 1.0417e-08
current_rise_time_pulse2 0.7100 1.4312e-08 8.9919e-09
current_fall_time_pulse1 0.9625 2.4879e-09 1.8598e-09
current_fall_time_pulse2 0.9622 2.4918e-09 1.8697e-09
       overshoot_pulse_1 0.8282 5.5812e+00 3.8498e+00
       overshoot_pulse_2 0.8185 1.1365e+01 8.2514e+00
      undershoot_pulse_1 0.8643 5.0022e+00 3.1050e+00
      undershoot_pulse_2 0.8672 4.9435e+00 3.1073e+00
   ringing_frequency_MHz 0.9943 2.4317e+00 1.5375e+00
R2_avg          0.8717
RMSE_avg    2.2557e+00
MAE_avg     1.5270e+00
dtype: object
Predict Time (ms/sample): 0.0010

=== Validation Set ===
                  Target     R2       RMSE        MAE
voltage_rise_time_pulse1 0.9112