### Physics-Informed Switching Severity Analysis (Per Device)
Outputs per device:
- device_labeled.csv              : original data + z-scores + Severity_Score + Scenario
- device_top5_best.csv            : 5 lowest scores (Best switching)
- device_top5_worst.csv           : 5 highest scores (Worst switching)
- device_score_hist.png           : histogram with Best/Worst thresholds
- device_PCA_best_worst.png       : PCA scatter (Best/Worst, Top-5 highlighted)
- device_contribution_bar.png     : Best vs Worst mean z-scores per parameter
- device_contrib_rows.csv : per-row contribution breakdown

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Need this for fontstyle and clear plots 
plt.rcParams.update({
    "font.size": 18,      
    "font.weight": "bold", 
    "axes.labelweight": "bold",
    "axes.titlesize": 16,  
    "axes.titleweight": "bold",
    "legend.fontsize": 16,
    "xtick.labelsize": 16,
    "ytick.labelsize": 16
})

# Setting the paths here 
INPUT_FILE  = r"C:\Users\pc\Desktop\Neural_Network_Models\merged_6_MOSFETs.csv"
OUTPUT_DIR  = r"C:\Users\pc\Desktop\Neural_Network_Models\Switching_Scenarios_Physics"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Best and worst extremes set
BEST_Q  = 0.10      
WORST_Q = 0.90      
SAVE_PER_ROW_CONTRIBUTIONS = True  
#this is for the ssame combinations of the iterations

# 13 targets switching-behaviour outputs
TARGET_COLS = [
    "voltage_rise_time_pulse1","voltage_rise_time_pulse2",
    "voltage_fall_time_pulse1","voltage_fall_time_pulse2",
    "current_rise_time_pulse1","current_rise_time_pulse2",
    "current_fall_time_pulse1","current_fall_time_pulse2",
    "overshoot_pulse_1","overshoot_pulse_2",
    "undershoot_pulse_1","undershoot_pulse_2",
    "ringing_frequency_MHz"
]

# Setting the time, frequency and voltage overshoot and undershoot into categories 
V_RISE  = ["voltage_rise_time_pulse1","voltage_rise_time_pulse2"]
V_FALL  = ["voltage_fall_time_pulse1","voltage_fall_time_pulse2"]
I_RISE  = ["current_rise_time_pulse1","current_rise_time_pulse2"]
I_FALL  = ["current_fall_time_pulse1","current_fall_time_pulse2"]
OVERS   = ["overshoot_pulse_1","overshoot_pulse_2"]
UNDERS  = ["undershoot_pulse_1","undershoot_pulse_2"]
RINGING = ["ringing_frequency_MHz"]

# checking targets are numeric and dropping the rows with missing targets.
def ensure_numeric_targets(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    for c in TARGET_COLS:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    return df.dropna(subset=TARGET_COLS)
# Standardising the 13 outputs for each device and returning the z-score 
def standardize_outputs(df_dev: pd.DataFrame) -> pd.DataFrame:
    scaler = StandardScaler()
    Z = scaler.fit_transform(df_dev[TARGET_COLS].values)
    zcols = [f"{c}__z" for c in TARGET_COLS]
    zdf = pd.DataFrame(Z, columns=zcols, index=df_dev.index)
    return zdf

# Here we set the physics informed scoring as before when calculated the overall severity score was not precise 
# as it was not considering the actual menaing of the voltage rise and fall 
# eg. lower the undershoot and overhsoot leesser the EMI
"""
    Compute physics-informed Severity_Score and per-parameter contributions.
    Rules:
      - Faster V/I rise/fall => worse  --> use -z
      - Larger over/under-shoot => worse --> +z
      - Lower ringing frequency => worse --> -z
    Return:
      score (pd.Series), contrib_df (pd.DataFrame of per-row contributions)
    """
def compute_physics_score_and_contrib(zdf: pd.DataFrame) -> tuple[pd.Series, pd.DataFrame]:
    contrib = pd.DataFrame(index=zdf.index)

    # Map z columns back to base names
    def z(name): return f"{name}__z"

    # Voltage transitions (fast worse => -z)
    for c in V_RISE + V_FALL:
        contrib[c] = -zdf[z(c)]

    # Current transitions (fast worse => -z)
    for c in I_RISE + I_FALL:
        contrib[c] = -zdf[z(c)]

    # Overshoot / Undershoot (larger worse => +z)
    for c in OVERS + UNDERS:
        contrib[c] =  zdf[z(c)]

    # Ringing frequency (lower worse => -z)
    for c in RINGING:
        contrib[c] = -zdf[z(c)]

    # Total score
    score = contrib.sum(axis=1)
    return score, contrib


#  Histograms added here 
def plot_score_hist(df_dev: pd.DataFrame, out_dir: str, device: str):
    plt.figure(figsize=(9,5))
    plt.hist(df_dev["Severity_Score"], bins=50, alpha=0.9, color="steelblue")
    plt.axvline(df_dev["Severity_Score"].quantile(BEST_Q),  linestyle="--", color="green", label="Best threshold")
    plt.axvline(df_dev["Severity_Score"].quantile(WORST_Q), linestyle="--", color="red",   label="Worst threshold")
    plt.title(f"Physics-Informed Severity Score Distribution ({device})")
    plt.xlabel("Severity_Score (physics-informed)")
    plt.ylabel("Count")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"{device}_score_hist.png"), dpi=240)
    plt.close()


def plot_pca_best_worst(df_dev: pd.DataFrame, zdf: pd.DataFrame, out_dir: str, device: str):
    """
    PCA scatter on standardized outputs (zdf), only Best & Worst points shown:
      - light green = all Best
      - light red   = all Worst
      - dark green  = Top-5 Best
      - dark red    = Top-5 Worst
    """
    pca = PCA(n_components=2, random_state=42)
    X_pca = pca.fit_transform(zdf.values)
    dfp = df_dev.copy()
    dfp["PCA1"], dfp["PCA2"] = X_pca[:,0], X_pca[:,1]

    # Masks
    best_mask  = dfp["Scenario"] == "Best"
    worst_mask = dfp["Scenario"] == "Worst"

    # Top-5 indices
    t5_best_idx  = dfp.loc[best_mask].sort_values("Severity_Score").head(5).index
    t5_worst_idx = dfp.loc[worst_mask].sort_values("Severity_Score", ascending=False).head(5).index

    plt.figure(figsize=(10,6))

    # All Best (light green)
    if best_mask.any():
        plt.scatter(dfp.loc[best_mask,"PCA1"], dfp.loc[best_mask,"PCA2"],
                    s=15, alpha=0.4, c="lightgreen", label="All Best")

    # All Worst (light red)
    if worst_mask.any():
        plt.scatter(dfp.loc[worst_mask,"PCA1"], dfp.loc[worst_mask,"PCA2"],
                    s=15, alpha=0.4, c="lightcoral", label="All Worst")

    # Top-5 Best (dark green)
    if len(t5_best_idx) > 0:
        t5b = dfp.loc[t5_best_idx]
        plt.scatter(t5b["PCA1"], t5b["PCA2"], s=120, c="darkgreen", label="Top 5 Best")

    # Top-5 Worst (dark red)
    if len(t5_worst_idx) > 0:
        t5w = dfp.loc[t5_worst_idx]
        plt.scatter(t5w["PCA1"], t5w["PCA2"], s=120, c="darkred", label="Top 5 Worst")

    expl = pca.explained_variance_ratio_
    plt.xlabel(f"PCA1 (z-outputs) — {expl[0]*100:.1f}% var")
    plt.ylabel(f"PCA2 (z-outputs) — {expl[1]*100:.1f}% var")
    plt.title(f"PCA of Switching Behaviour (Physics-Informed) — {device}")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"{device}_PCA_best_worst.png"), dpi=240)
    plt.close()

def plot_contribution_bar(df_out: pd.DataFrame, out_dir: str, device: str):
    """
    Bar chart of mean z-scores per parameter for Best vs Worst.
    Shows which parameters differ most between Best and Worst.
    """
    # Only z-score columns
    zcols = [c for c in df_out.columns if c.endswith("__z")]
    bw = df_out[df_out["Scenario"].isin(["Best","Worst"])]
    if bw.empty or ("Best" not in bw["Scenario"].unique()) or ("Worst" not in bw["Scenario"].unique()):
        return

    means = bw.groupby("Scenario")[zcols].mean().T
    # Plot grouped bars
    plt.figure(figsize=(12,6))
    x = np.arange(len(means))
    w = 0.42
    plt.bar(x - w/2, means["Best"].values,  width=w, label="Best")
    plt.bar(x + w/2, means["Worst"].values, width=w, label="Worst")
    xticklabels = [c.replace("__z","").replace("_"," ") for c in means.index]
    plt.xticks(x, xticklabels, rotation=45, ha="right")
    plt.ylabel("Mean standardized value (z-score)")
    plt.title(f"Parameter Contributions: Best vs Worst ({device})")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"{device}_contribution_bar.png"), dpi=240)
    plt.close()

# ---------- MAIN --------------
def main():
    df_all = pd.read_csv(INPUT_FILE)
    if "Part_Number" not in df_all.columns:
        raise ValueError("Input must include a 'Part_Number' column.")

    for device, df_dev in df_all.groupby("Part_Number"):
        print(f"\n=== Processing: {device} ===")
        out_dir = os.path.join(OUTPUT_DIR, device)
        os.makedirs(out_dir, exist_ok=True)

        # Clean and ensure numeric targets
        df_dev = ensure_numeric_targets(df_dev)
        if df_dev.empty:
            print("  -> No valid rows after cleaning; skipping.")
            continue

        # Standardize outputs per device
        zdf = standardize_outputs(df_dev)

        # Compute physics-informed Severity_Score (+ per-row contributions)
        score, contrib = compute_physics_score_and_contrib(zdf)
        df_dev = df_dev.copy()
        df_dev["Severity_Score"] = score

        # Label Best / Neutral / Worst
        low  = df_dev["Severity_Score"].quantile(BEST_Q)
        high = df_dev["Severity_Score"].quantile(WORST_Q)
        df_dev["Scenario"] = np.where(df_dev["Severity_Score"] <= low, "Best",
                               np.where(df_dev["Severity_Score"] >= high, "Worst", "Neutral"))

        # Save annotated CSV
        df_out = pd.concat([df_dev, zdf], axis=1)
        df_out.to_csv(os.path.join(out_dir, f"{device}_labeled.csv"), index=False)

        # Save Top-5 Best/Worst
        top5_best  = df_out[df_out["Scenario"]=="Best"].sort_values("Severity_Score").head(5)
        top5_worst = df_out[df_out["Scenario"]=="Worst"].sort_values("Severity_Score", ascending=False).head(5)
        top5_best.to_csv(os.path.join(out_dir, f"{device}_top5_best.csv"), index=False)
        top5_worst.to_csv(os.path.join(out_dir, f"{device}_top5_worst.csv"), index=False)

        # (Optional) per-row contribution CSV
        if SAVE_PER_ROW_CONTRIBUTIONS:
            contrib_csv = contrib.copy()
            contrib_csv["Severity_Score"] = score
            contrib_csv["Scenario"] = df_dev["Scenario"].values
            contrib_csv.to_csv(os.path.join(out_dir, f"{device}_contrib_rows.csv"), index=False)

        # Plots
        plot_score_hist(df_out, out_dir, device)
        plot_pca_best_worst(df_out, zdf, out_dir, device)
        plot_contribution_bar(df_out, out_dir, device)

        print(f"  -> Saved artifacts in: {out_dir}")

if __name__ == "__main__":
    main()



=== Processing: C2M0025120D ===
  -> Saved artifacts in: C:\Users\pc\Desktop\Neural_Network_Models\Switching_Scenarios_Physics\C2M0025120D

=== Processing: C2M0040120D ===
  -> Saved artifacts in: C:\Users\pc\Desktop\Neural_Network_Models\Switching_Scenarios_Physics\C2M0040120D

=== Processing: C2M0080120D ===
  -> Saved artifacts in: C:\Users\pc\Desktop\Neural_Network_Models\Switching_Scenarios_Physics\C2M0080120D

=== Processing: C2M0160120D ===
  -> Saved artifacts in: C:\Users\pc\Desktop\Neural_Network_Models\Switching_Scenarios_Physics\C2M0160120D

=== Processing: C2M0280120D ===
  -> Saved artifacts in: C:\Users\pc\Desktop\Neural_Network_Models\Switching_Scenarios_Physics\C2M0280120D

=== Processing: C2M1000170D ===
  -> Saved artifacts in: C:\Users\pc\Desktop\Neural_Network_Models\Switching_Scenarios_Physics\C2M1000170D
