# Monte Carlo Visualizer (`mc-vis.ipynb`)
**How to use**  
1. Edit the `ARGS` list below just as you would supply command‑line flags.  
   *Examples:*  
   * `ARGS = []` &nbsp;→ uses the default CSV (`mc_data/mc_output6.csv`) and shows plots.  
   * `ARGS = ["mc_data/mc_output5.csv", "--save"]` &nbsp;→ saves PNGs into a folder next to the CSV.  
2. Run the remaining cells in order.  Figures appear inline unless `--save` is given.

> The few notebook‑specific tweaks are wrapped in ▸ comments so you can still drop this back into a `.py` file if desired.


In [1]:
# input the files that need to be analyzed
ARGS = []  # e.g. ARGS = ["mc_data/mc_output5.csv", "--save", "--pairplot"]


In [None]:
import argparse
import pathlib
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats

# --------------------------------------------------
# Helper / utility functions (unchanged from script)
# --------------------------------------------------

def describe(df: pd.DataFrame, num_cols) -> None:
    print("\n=== Descriptive statistics (numeric columns) ===")
    print(df[num_cols].describe(percentiles=[0.05, 0.25, 0.5, 0.75, 0.95]).round(3))


def correlation_matrix(df: pd.DataFrame, num_cols) -> pd.DataFrame:
    corr = df[num_cols].corr(method="pearson")
    print("\n========= Pearson correlation matrix (rho) =========")
    print(corr.round(3))

    # p-values (Pearson below diag, Spearman above)
    p_pearson  = df[num_cols].corr(method=lambda x, y: stats.pearsonr(x, y)[1]) - np.eye(len(num_cols))
    p_spearman = df[num_cols].corr(method=lambda x, y: stats.spearmanr(x, y)[1]) - np.eye(len(num_cols))
    p_vals = np.tril(p_pearson, k=-1) + np.triu(p_spearman, k=1)
    print("\n(p-values, lower ▷ Pearson  upper ▷ Spearman)")
    print(p_vals.round(3))

    # correlation matrix heat map
    plt.figure(figsize=(7, 6))
    sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f",
                linewidths=0.5, cbar_kws={"label": "rho"})
    plt.title("Parameter correlations")
    plt.tight_layout()
    return corr


def scatter(df, x, y, hue=None, **kw):
    """General scatter plot helper used in the original script"""
    sns.scatterplot(data=df, x=x, y=y, hue=hue, alpha=0.7, **kw)
    sns.regplot(data=df, x=x, y=y, scatter=False,
                color="k", ci=None, line_kws={"ls": "--"})


def ranking_row(df) -> pd.Series:
    """Row‑ranking helper (identical to original)"""
    return df.sort_values(
        ["hit_ratio", "mean_deflection_deg", "wake_wafer_distance"],
        ascending=[True, False, True]
    ).iloc[0]


In [None]:
# --------------------------------------------------
# Main analysis logic (minimally modified for notebooks)
# --------------------------------------------------

def main(argv=None):
    """Entry point matching the behaviour of `mc-vis.py`."""

    # ▸ Allow passing an explicit argv list so we can inject ARGS from a notebook.
    if argv is None:
        argv = []

    # --- parse arguments exactly like the original script ---
    p = argparse.ArgumentParser(add_help=False)
    p.add_argument("csv", nargs="?", help="CSV file to analyse (default: mc_data/mc_output6.csv)")
    p.add_argument("--save", action="store_true", help="Save figures as PNGs")
    p.add_argument("--pairplot", action="store_true", help="Generate seaborn pair‑plot")
    args = p.parse_args(argv)

    # --- paths (handle notebook context where __file__ may not exist) ---
    try:
        SCRIPT_DIR = pathlib.Path(__file__).resolve().parent
    except NameError:
        SCRIPT_DIR = pathlib.Path(".").resolve()

    DEFAULT_CSV = SCRIPT_DIR / "mc_data" / "mc_output6.csv"
    CSV_PATH = pathlib.Path(args.csv).expanduser() if args.csv else DEFAULT_CSV

    # --- load the CSV ---
    if not CSV_PATH.exists():
        sys.exit(f"Could not find '{CSV_PATH}'")
    df = pd.read_csv(CSV_PATH)
    if df.empty:
        sys.exit("The CSV is empty")
    print(f"\nLoaded {len(df):,} rows from {CSV_PATH}")

    # --- analysis steps (identical) ---
    numeric_cols = df.select_dtypes(include=np.number).columns
    print(f"\n========= Describing the numeric data =========\n")
    describe(df, numeric_cols)
    corr = correlation_matrix(df, numeric_cols)

    for col in ["hit_ratio", "wake_wafer_distance"]:
        if col in df.columns and df[col].nunique() == 1:
            print(f"WARNING: {col} is constant ({df[col].iloc[0]}) - correlations are not meaningful.")

    sns.set_theme(style="whitegrid")

    # curvature → deflection
    plt.figure(figsize=(7, 4))
    scatter(df, "curvature", "mean_deflection_deg", hue="coating_type")
    plt.xlabel("Shield curvature (1/meter)")
    plt.ylabel("AVG. deflection angle")
    plt.title("curvature vs. deflection")
    plt.tight_layout()

    # wake distance → hit‑ratio
    plt.figure(figsize=(7, 4))
    scatter(df, "wake_wafer_distance", "hit_ratio", hue="coating_type")
    plt.xlabel("Wake-to-wafer distance (m)")
    plt.ylabel("Hit ratio")
    plt.title("Wake distance vs. wafer impact probability")
    plt.tight_layout()

    # optional heavy pair‑plot
    if args.pairplot:
        pair_vars = ["radius", "curvature", "mass",
                     "wake_wafer_distance", "mean_deflection_deg", "hit_ratio"]
        sns.pairplot(df, vars=pair_vars, hue="coating_type", corner=True,
                     plot_kws={"alpha": 0.5, "s": 25, "edgecolor": "w"})
        plt.suptitle("Pair‑wise relationships", y=1.02)

    # design hints
    best_row = ranking_row(df)
    best_coating = df.groupby("coating_type")["mean_deflection_deg"].mean().idxmax()

    print("\n=== Guiding conclusions ===")
    print(f"• Coating with highest *mean* deflection: {best_coating}")
    print(f"• ρ(curvature, deflection) : {corr.loc['curvature', 'mean_deflection_deg']:+.2f}")
    print(f"• ρ(radius, deflection)    : {corr.loc['radius', 'mean_deflection_deg']:+.2f}")
    print(f"• ρ(wake_dist, hit_ratio)  : {corr.loc['wake_wafer_distance', 'hit_ratio']:+.2f}")

    print("\n🏆 Top performer (min hit_ratio → max deflection → min wake_dist)")
    print(f"    radius              = {best_row['radius']:.3f} m")
    print(f"    curvature           = {best_row['curvature']:.2f} 1/m")
    print(f"    wake-wafer distance = {best_row['wake_wafer_distance']:.3f} m")
    print(f"    coating             = {best_row['coating_type']}")
    print(f"    deflection          = {best_row['mean_deflection_deg']:.2f} °")
    print(f"    hit_ratio           = {best_row['hit_ratio']:.4f}")

    # --- output: save or show ---
    if args.save:
        out_dir = CSV_PATH.with_suffix("")  # “mc_output6” → directory
        out_dir.mkdir(exist_ok=True)
        for i, fignum in enumerate(plt.get_fignums(), 1):
            plt.figure(fignum)
            fname = out_dir / f"fig{i:02}.png"
            plt.savefig(fname, dpi=300)
            print(f"  ↳ saved {fname}")
    else:
        plt.show()


In [None]:
# --------------------------------------------------
# Run the analysis with the specified ARGS
# --------------------------------------------------
# Inject ARGS into sys.argv exactly as if we were on the CLI
import sys as _sys
_sys.argv = ["mc-vis.py"] + ARGS

main(argv=ARGS)