In [None]:
# ===============================================================
# HIGGS BOSON — 3D Feature EDA (Print-only, Mirrored Views)
# ===============================================================

import os, zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-whitegrid')

# ---------------- Kaggle File Handling ----------------
zip_files = {
    "train": "/kaggle/input/higgs-boson/training.zip",
    "test": "/kaggle/input/higgs-boson/test.zip",
    "submission": "/kaggle/input/higgs-boson/random_submission.zip"
}
extract_dir = "/kaggle/working/higgs_data/"
os.makedirs(extract_dir, exist_ok=True)

for key, path in zip_files.items():
    if os.path.exists(path):
        with zipfile.ZipFile(path, "r") as z:
            z.extractall(extract_dir)
        print(f"{key} unzipped.")
    else:
        print(f"{key} zip not found at {path}")

# ---------------- Load and clean data ----------------
TRAIN_CSV = os.path.join(extract_dir, "training.csv")
train_df = pd.read_csv(TRAIN_CSV)
train_df.replace(-999.0, np.nan, inplace=True)

# ---------------- Split classes ----------------
sig = train_df[train_df["Label"] == "s"]
bkg = train_df[train_df["Label"] == "b"]

# ---------------- Fixed axes ----------------
x_axis = "PRI_jet_num"
y_axis = "DER_mass_MMC"
exclude_cols = ["EventId", "Weight", "Label", x_axis, y_axis]

z_features = [c for c in train_df.select_dtypes(include=np.number).columns if c not in exclude_cols]
print(f"Total numeric features: {len(z_features)}")

# ---------------- 3D Plot function ----------------
def plot_3d(z_feature, elev=25, azim=45):
    fig = plt.figure(figsize=(10,4), dpi=160)

    # Background
    ax1 = fig.add_subplot(121, projection='3d')
    sc1 = ax1.scatter(
        bkg[x_axis], bkg[y_axis], bkg[z_feature],
        c=bkg[z_feature], cmap='cividis', s=6, alpha=0.5
    )
    ax1.set_title("Background Events", fontsize=10)
    ax1.set_xlabel(x_axis)
    ax1.set_ylabel(y_axis)
    ax1.set_zlabel(z_feature)
    ax1.view_init(elev=elev, azim=azim)

    # Signal (mirrored)
    ax2 = fig.add_subplot(122, projection='3d')
    sc2 = ax2.scatter(
        sig[x_axis], sig[y_axis], sig[z_feature],
        c=sig[z_feature], cmap='plasma', s=6, alpha=0.5
    )
    ax2.set_title("Signal Events", fontsize=10)
    ax2.set_xlabel(x_axis)
    ax2.set_ylabel(y_axis)
    ax2.set_zlabel(z_feature)
    ax2.view_init(elev=elev, azim=-azim)  # mirrored

    plt.suptitle(f"{z_feature} vs {y_axis} and {x_axis}", fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.show()

# ---------------- Generate all plots ----------------
for zf in z_features:
    try:
        plot_3d(zf)
    except Exception as e:
        print(f"Skipping {zf}: {e}")

print("✅ All 3D plots printed successfully.")


In [None]:
# ===============================================================
# HIGGS BOSON — 3D Feature EDA (Improved Background, No Mirroring)
# ===============================================================

import os, zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-whitegrid')

# ---------------- Kaggle File Handling ----------------
zip_files = {
    "train": "/kaggle/input/higgs-boson/training.zip",
    "test": "/kaggle/input/higgs-boson/test.zip",
    "submission": "/kaggle/input/higgs-boson/random_submission.zip"
}
extract_dir = "/kaggle/working/higgs_data/"
os.makedirs(extract_dir, exist_ok=True)

for key, path in zip_files.items():
    if os.path.exists(path):
        with zipfile.ZipFile(path, "r") as z:
            z.extractall(extract_dir)
        print(f"{key} unzipped.")
    else:
        print(f"{key} zip not found at {path}")

# ---------------- Load and clean data ----------------
TRAIN_CSV = os.path.join(extract_dir, "training.csv")
train_df = pd.read_csv(TRAIN_CSV)
train_df.replace(-999.0, np.nan, inplace=True)

# ---------------- Split classes ----------------
sig = train_df[train_df["Label"] == "s"]
bkg = train_df[train_df["Label"] == "b"]

# ---------------- Axes ----------------
x_axis = "PRI_jet_num"
y_axis = "DER_mass_MMC"
exclude_cols = ["EventId", "Weight", "Label", x_axis, y_axis]

z_features = [c for c in train_df.select_dtypes(include=np.number).columns if c not in exclude_cols]
print(f"Total numeric features: {len(z_features)}")

# ---------------- 3D Plot Function (Mirroring Removed) ----------------
def plot_3d(z_feature, elev=25, azim=45):

    # -------- Prepare Background --------
    bkg_df = bkg.copy()

    # Weighted sampling for clarity
    if len(bkg_df) > 80000:
        bkg_df = bkg_df.sample(
            frac=0.25, weights=bkg_df["Weight"], random_state=42
        )

    # Log scaling for Z
    z_vals = bkg_df[z_feature].fillna(bkg_df[z_feature].median())
    z_min = z_vals.min()
    log_z = np.log1p(z_vals - z_min)
    log_z_norm = (log_z - log_z.min()) / (log_z.max() - log_z.min() + 1e-8)

    # Weighted point sizes
    size_bkg = 5 + 20 * (bkg_df["Weight"] / bkg_df["Weight"].max())

    # -------- Prepare Signal --------
    sig_df = sig.copy()
    sig_z = sig_df[z_feature].fillna(sig_df[z_feature].median())
    sig_z_norm = (sig_z - sig_z.min()) / (sig_z.max() - sig_z.min() + 1e-8)
    size_sig = 8 + 18 * sig_z_norm

    # -------- Plot --------
    fig = plt.figure(figsize=(11,4), dpi=160)

    # Background
    ax1 = fig.add_subplot(121, projection='3d')
    ax1.scatter(
        bkg_df[x_axis],
        bkg_df[y_axis],
        log_z,
        c=log_z_norm,
        cmap="cividis_r",
        s=size_bkg,
        alpha=0.55
    )
    ax1.set_title("Background Events (weighted, log-scaled)", fontsize=10)
    ax1.set_xlabel(x_axis)
    ax1.set_ylabel(y_axis)
    ax1.set_zlabel(f"log1p({z_feature})")
    ax1.view_init(elev=elev, azim=azim)  # FIXED: Same angle

    # Signal (same view, NOT mirrored)
    ax2 = fig.add_subplot(122, projection='3d')
    ax2.scatter(
        sig_df[x_axis],
        sig_df[y_axis],
        sig_z,
        c=sig_z_norm,
        cmap="plasma",
        s=size_sig,
        alpha=0.55
    )
    ax2.set_title("Signal Events", fontsize=10)
    ax2.set_xlabel(x_axis)
    ax2.set_ylabel(y_axis)
    ax2.set_zlabel(z_feature)
    ax2.view_init(elev=elev, azim=azim)  # FIXED HERE

    plt.suptitle(f"{z_feature} vs {y_axis} and {x_axis}", fontsize=12, fontweight="bold")
    plt.tight_layout()
    plt.show()

# ---------------- Generate all plots ----------------
for zf in z_features:
    try:
        plot_3d(zf)
    except Exception as e:
        print(f"Skipping {zf}: {e}")

print("✅ All 3D plots printed successfully.")


In [1]:
# ===============================================================
# HIGGS BOSON — 3D Feature EDA (4-Plot JPEG Export)
# ===============================================================

import os, zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-whitegrid')

# ---------------- Kaggle File Handling ----------------
zip_files = {
    "train": "/kaggle/input/higgs-boson/training.zip",
    "test": "/kaggle/input/higgs-boson/test.zip",
    "submission": "/kaggle/input/higgs-boson/random_submission.zip"
}
extract_dir = "/kaggle/working/higgs_data/"
os.makedirs(extract_dir, exist_ok=True)

for key, path in zip_files.items():
    if os.path.exists(path):
        with zipfile.ZipFile(path, "r") as z:
            z.extractall(extract_dir)
        print(f"{key} unzipped.")
    else:
        print(f"{key} zip not found at {path}")

# ---------------- Load and clean data ----------------
TRAIN_CSV = os.path.join(extract_dir, "training.csv")
train_df = pd.read_csv(TRAIN_CSV)
train_df.replace(-999.0, np.nan, inplace=True)

# ---------------- Split classes ----------------
sig = train_df[train_df["Label"] == "s"]
bkg = train_df[train_df["Label"] == "b"]

# ---------------- Axes ----------------
x_axis = "PRI_jet_num"
y_axis = "DER_mass_MMC"
exclude_cols = ["EventId", "Weight", "Label", x_axis, y_axis]

z_features = [
    c for c in train_df.select_dtypes(include=np.number).columns
    if c not in exclude_cols
]

print(f"Total numeric features: {len(z_features)}")

# Directory for saving plots
save_dir = "/kaggle/working/3d_plots_jpeg/"
os.makedirs(save_dir, exist_ok=True)


# ---------------- Helper Function ----------------
def prepare_background(z_feature):
    bkg_df = bkg.copy()

    if len(bkg_df) > 80000:
        bkg_df = bkg_df.sample(
            frac=0.25, weights=bkg_df["Weight"], random_state=42
        )

    z_vals = bkg_df[z_feature].fillna(bkg_df[z_feature].median())
    z_min = z_vals.min()

    log_z = np.log1p(z_vals - z_min)
    log_norm = (log_z - log_z.min()) / (log_z.max() - log_z.min() + 1e-8)

    point_size = 5 + 20 * (bkg_df["Weight"] / bkg_df["Weight"].max())

    return bkg_df, log_z, log_norm, point_size


def prepare_signal(z_feature):
    sig_df = sig.copy()

    z_vals = sig_df[z_feature].fillna(sig_df[z_feature].median())
    z_norm = (z_vals - z_vals.min()) / (z_vals.max() - z_vals.min() + 1e-8)
    point_size = 8 + 18 * z_norm

    return sig_df, z_vals, z_norm, point_size


# ---------------- Batch Plot: 4 Subplots per JPEG ----------------
def plot_batch(z1, z2, elev=25, azim=45):
    fig = plt.figure(figsize=(18, 7), dpi=140)

    ### -------- Feature 1 Background --------
    bkg1, logz1, c1, size1 = prepare_background(z1)
    ax1 = fig.add_subplot(141, projection='3d')
    ax1.scatter(
        bkg1[x_axis], bkg1[y_axis], logz1,
        c=c1, cmap="cividis_r", s=size1, alpha=0.55
    )
    ax1.set_title(f"{z1} Background")
    ax1.set_xlabel(x_axis)
    ax1.set_ylabel(y_axis)
    ax1.set_zlabel(f"log1p({z1})")
    ax1.view_init(elev=elev, azim=azim)

    ### -------- Feature 1 Signal --------
    sig1, z1_raw, c1s, size1s = prepare_signal(z1)
    ax2 = fig.add_subplot(142, projection='3d')
    ax2.scatter(
        sig1[x_axis], sig1[y_axis], z1_raw,
        c=c1s, cmap="plasma", s=size1s, alpha=0.55
    )
    ax2.set_title(f"{z1} Signal")
    ax2.set_xlabel(x_axis)
    ax2.set_ylabel(y_axis)
    ax2.set_zlabel(z1)
    ax2.view_init(elev=elev, azim=azim)

    ### -------- Feature 2 Background --------
    bkg2, logz2, c2, size2 = prepare_background(z2)
    ax3 = fig.add_subplot(143, projection='3d')
    ax3.scatter(
        bkg2[x_axis], bkg2[y_axis], logz2,
        c=c2, cmap="cividis_r", s=size2, alpha=0.55
    )
    ax3.set_title(f"{z2} Background")
    ax3.set_xlabel(x_axis)
    ax3.set_ylabel(y_axis)
    ax3.set_zlabel(f"log1p({z2})")
    ax3.view_init(elev=elev, azim=azim)

    ### -------- Feature 2 Signal --------
    sig2, z2_raw, c2s, size2s = prepare_signal(z2)
    ax4 = fig.add_subplot(144, projection='3d')
    ax4.scatter(
        sig2[x_axis], sig2[y_axis], z2_raw,
        c=c2s, cmap="plasma", s=size2s, alpha=0.55
    )
    ax4.set_title(f"{z2} Signal")
    ax4.set_xlabel(x_axis)
    ax4.set_ylabel(y_axis)
    ax4.set_zlabel(z2)
    ax4.view_init(elev=elev, azim=azim)

    batch_name = f"{z1}__{z2}.jpeg".replace("/", "_")
    full_path = os.path.join(save_dir, batch_name)

    plt.tight_layout()
    plt.savefig(full_path, format="jpeg", dpi=200)
    plt.close()

    print(f"Saved {full_path}")


# ---------------- Main Loop: 2 Features → 1 JPEG ----------------
for i in range(0, len(z_features), 2):
    if i + 1 >= len(z_features):
        break

    f1 = z_features[i]
    f2 = z_features[i + 1]

    try:
        plot_batch(f1, f2)
    except Exception as e:
        print(f"Error with {f1}, {f2}: {e}")

print("✅ All JPEG 4-subplot images saved.")


train unzipped.
test unzipped.
submission unzipped.
Total numeric features: 28
Saved /kaggle/working/3d_plots_jpeg/DER_mass_transverse_met_lep__DER_mass_vis.jpeg
Saved /kaggle/working/3d_plots_jpeg/DER_pt_h__DER_deltaeta_jet_jet.jpeg
Saved /kaggle/working/3d_plots_jpeg/DER_mass_jet_jet__DER_prodeta_jet_jet.jpeg
Saved /kaggle/working/3d_plots_jpeg/DER_deltar_tau_lep__DER_pt_tot.jpeg
Saved /kaggle/working/3d_plots_jpeg/DER_sum_pt__DER_pt_ratio_lep_tau.jpeg
Saved /kaggle/working/3d_plots_jpeg/DER_met_phi_centrality__DER_lep_eta_centrality.jpeg
Saved /kaggle/working/3d_plots_jpeg/PRI_tau_pt__PRI_tau_eta.jpeg
Saved /kaggle/working/3d_plots_jpeg/PRI_tau_phi__PRI_lep_pt.jpeg
Saved /kaggle/working/3d_plots_jpeg/PRI_lep_eta__PRI_lep_phi.jpeg
Saved /kaggle/working/3d_plots_jpeg/PRI_met__PRI_met_phi.jpeg
Saved /kaggle/working/3d_plots_jpeg/PRI_met_sumet__PRI_jet_leading_pt.jpeg
Saved /kaggle/working/3d_plots_jpeg/PRI_jet_leading_eta__PRI_jet_leading_phi.jpeg
Saved /kaggle/working/3d_plots_jpeg/PR