In [9]:
# Classical_Models/RF/random_forest_generalization.py
# Usage: $env:PYTHONPATH="."; python Classical_Models\RF\random_forest_generalization.py

import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import time

# === CONFIGURATION ===
TRAIN_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\raw\merged_train_5_MOSFETS.csv"
TEST_FILE = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\data\raw\merged_test_1_MOSFET.csv"
MODEL_DIR = r"C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\Classical_Models\models\random_forest\generalization"
os.makedirs(MODEL_DIR, exist_ok=True)

TARGET_COLUMNS = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2',
    'overshoot_pulse_1', 'overshoot_pulse_2',
    'undershoot_pulse_1', 'undershoot_pulse_2',
    'ringing_frequency_MHz'
]
DROP_COLUMNS = ['DeviceID']

# === 1. Load Data ===
df_train = pd.read_csv(TRAIN_FILE)
df_train = df_train.sample(frac=0.25, random_state=42)
df_test = pd.read_csv(TEST_FILE)

df_train.dropna(subset=TARGET_COLUMNS, inplace=True)
df_test.dropna(subset=TARGET_COLUMNS, inplace=True)

X_train = df_train.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y_train = df_train[TARGET_COLUMNS]

X_test = df_test.drop(columns=DROP_COLUMNS + TARGET_COLUMNS)
y_test = df_test[TARGET_COLUMNS]

# === 2. Scaling ===
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train)
X_test_scaled = scaler_X.transform(X_test)
y_test_scaled = scaler_y.transform(y_test)

# Save scalers
joblib.dump(scaler_X, os.path.join(MODEL_DIR, "input_scaler.pkl"))
joblib.dump(scaler_y, os.path.join(MODEL_DIR, "output_scaler.pkl"))

# === 3. Train Model ===
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=30, random_state=42, n_jobs=-1))
start_time = time.time()
model.fit(X_train_scaled, y_train_scaled)
print(f"Generalization training completed in {time.time() - start_time:.2f} seconds")

# === 4. Evaluate on Unseen Test Data ===
def evaluate_and_save(model, X_scaled, y_scaled, label, save_prefix):
    y_pred_scaled = model.predict(X_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_scaled)

    rmse = np.sqrt(mean_squared_error(y_true, y_pred, multioutput='raw_values'))
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')

    # Print Table
    print(f"\n{label} Evaluation:")
    print(f"{'Target Output':<35} {'RMSE':>15} {'R² Score':>12}")
    print("-" * 64)
    for i, col in enumerate(TARGET_COLUMNS):
        print(f"{col:<35} {rmse[i]:>15.2E} {r2[i]:>12.4f}")

    # Save predictions
    df_pred = pd.DataFrame({f"{col}_actual": y_true[:, i] for i, col in enumerate(TARGET_COLUMNS)})
    for i, col in enumerate(TARGET_COLUMNS):
        df_pred[f"{col}_predicted"] = y_pred[:, i]
    pred_path = os.path.join(MODEL_DIR, f"{save_prefix}_predictions.csv")
    df_pred.to_csv(pred_path, index=False)

    # Save metrics
    df_metrics = pd.DataFrame({
        "Target": TARGET_COLUMNS,
        "RMSE": [f"{val:.2E}" for val in rmse],
        "R2_Score": [round(val, 4) for val in r2]
    })
    metrics_path = os.path.join(MODEL_DIR, f"{save_prefix}_metrics.csv")
    df_metrics.to_csv(metrics_path, index=False)

    print(f"Saved predictions to: {pred_path}")
    print(f"Saved metrics to: {metrics_path}")

# === 5. Run Evaluation
evaluate_and_save(model, X_test_scaled, y_test_scaled, "Unseen MOSFET Generalization", "generalization")

# === 6. Save Model
joblib.dump(model, os.path.join(MODEL_DIR, "rf_model_generalization.pkl"))
print(f"\nFinal generalization model saved.")


Generalization training completed in 172.58 seconds

Unseen MOSFET Generalization Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
voltage_rise_time_pulse1                   2.23E-08      -0.0655
voltage_rise_time_pulse2                   2.87E-07       0.4606
voltage_fall_time_pulse1                   9.31E-09      -7.2793
voltage_fall_time_pulse2                   8.69E-08       0.2725
current_rise_time_pulse1                   9.55E-07       0.5431
current_rise_time_pulse2                   3.57E-07       0.8074
current_fall_time_pulse1                   7.57E-06       0.3598
current_fall_time_pulse2                   2.66E-08      -0.0478
overshoot_pulse_1                          2.44E+01       0.0757
overshoot_pulse_2                          1.82E+01       0.8066
undershoot_pulse_1                         1.92E+01       0.5474
undershoot_pulse_2                         2.46E+01       0.6

Generalization training completed in 172.58 seconds

Unseen MOSFET Generalization Evaluation:
Target Output                                  RMSE     R² Score
----------------------------------------------------------------
- voltage_rise_time_pulse1            |       2.23E-08   |   -0.0655
- voltage_rise_time_pulse2            |      2.87E-07    |   0.4606
- voltage_fall_time_pulse1            |       9.31E-09   |   -7.2793
- voltage_fall_time_pulse2            |       8.69E-08   |    0.2725
- current_rise_time_pulse1            |       9.55E-07   |    0.5431
- current_rise_time_pulse2            |       3.57E-07   |    0.8074
- current_fall_time_pulse1            |       7.57E-06   |    0.3598
- current_fall_time_pulse2            |       2.66E-08   |   -0.0478
- overshoot_pulse_1                   |       2.44E+01   |    0.0757
- overshoot_pulse_2                   |       1.82E+01   |    0.8066
- undershoot_pulse_1                  |       1.92E+01   |    0.5474
- undershoot_pulse_2                  |       2.46E+01   |    0.6713
- ringing_frequency_MHz               |       3.70E+01   |  -29.7959


Saved predictions to: C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\Classical_Models\models\random_forest\generalization\generalization_predictions.csv
Saved metrics to: C:\Users\pc\Desktop\PROJECT_THESIS_Thrisha_Rajkumar\Classical_Models\models\random_forest\generalization\generalization_metrics.csv

Final generalization model saved.