In [None]:
# Cell 1: Imports & setup
import sys
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
import numpy as np

# Try to import UMAP
try:
    import umap
    has_umap = True
except ImportError:
    has_umap = False

# Ensure src/ is on the Python path for imports
project_root = Path.cwd()
sys.path.insert(0, str(project_root / "src"))
from EXP_improved import load_data
from utils.feature_extraction import transform_data


export_dir = Path("../export")
export_dir.mkdir(exist_ok=True)



In [None]:
# Cell 2: Load and transform data
X_data, y_data = load_data()                      # load raw vibration samples & labels


In [None]:
X, y = transform_data(X_data, y_data, label_type="string", include_metadata=True)  # extract features

In [None]:
# don’t do pd.DataFrame(X, …) again; X is already a DataFrame
df = X.copy()
df['status'] = y

# now df.columns will include 'machine' and 'process'
print(df.columns)

df

In [None]:
# Cell 8: Combined UMAP plots: one figure per machine, one figure per process
from umap.umap_ import UMAP
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import math
# Build df with metadata & status
df_umap = X.copy()
df_umap["status"] = y

# Identify numeric features
meta_cols = ["machine", "process", "status"]
feature_cols = [c for c in df_umap.columns if c not in meta_cols]
X_num = df_umap[feature_cols].values

# Standardize and compute UMAP embedding once
X_scaled = StandardScaler().fit_transform(X_num)
reducer = UMAP(
    n_components=2,
    random_state=42,
    n_neighbors=15,
    min_dist=0.1,
    metric="correlation"
)
emb = reducer.fit_transform(X_scaled)
df_umap["UMAP1"], df_umap["UMAP2"] = emb[:, 0], emb[:, 1]

# 1) One figure with machines stacked vertically (one per machine)
machines = sorted(df_umap["machine"].unique())
fig, axes = plt.subplots(len(machines), 1, figsize=(5, 5 * len(machines)), sharex=True, sharey=True)
for ax, mach in zip(axes, machines):
    sub = df_umap[df_umap["machine"] == mach]
    ax.scatter(sub.loc[sub["status"] == 0, "UMAP1"], sub.loc[sub["status"] == 0, "UMAP2"],
               c="green", alpha=0.6, s=25, label="good")
    ax.scatter(sub.loc[sub["status"] == 1, "UMAP1"], sub.loc[sub["status"] == 1, "UMAP2"],
               c="red", alpha=0.8, s=30, label="bad")
    ax.set_title(f"Machine {mach}")
    ax.set_xlabel("UMAP1")
    ax.set_ylabel("UMAP2")
    ax.grid(True, linestyle='--', alpha=0.7)
    if ax is axes[0]:
        ax.legend()
fig.suptitle("UMAP Embedding by Machine", y=1.02)
fig.tight_layout()
plt.savefig(export_dir / "umap_by_machine.png")
plt.show()

# 2) One figure with a grid of panels (one per operation), 3 columns
processes = sorted(df_umap["process"].unique())
n_cols = 3
n_rows = (len(processes) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows), sharex=True, sharey=True)
axes = axes.flatten()

for ax, proc in zip(axes, processes):
    sub = df_umap[df_umap["process"] == proc]
    ax.scatter(sub.loc[sub["status"] == 0, "UMAP1"], sub.loc[sub["status"] == 0, "UMAP2"],
               c="green", alpha=0.6, s=20)
    ax.scatter(sub.loc[sub["status"] == 1, "UMAP1"], sub.loc[sub["status"] == 1, "UMAP2"],
               c="red", alpha=0.8, s=25)
    ax.set_title(proc)
    ax.grid(True, linestyle='--', alpha=0.7)  # Added grid lines
    ax.set_xticks([]); ax.set_yticks([])

# Hide unused subplots
for ax in axes[len(processes):]:
    ax.set_visible(False)

fig.suptitle("UMAP Embedding by Process", y=1.02)
fig.tight_layout()
plt.subplots_adjust(hspace=0.3, wspace=0.3)  # Adjust spacing for better alignment
plt.savefig(export_dir / "umap_by_process.png")
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))
ax.scatter(
    df_umap.loc[df_umap["status"] == 1, "UMAP1"],
    df_umap.loc[df_umap["status"] == 1, "UMAP2"],
    c="green", alpha=0.6, s=25, label="good"
)
ax.scatter(
    df_umap.loc[df_umap["status"] == 0, "UMAP1"],
    df_umap.loc[df_umap["status"] == 0, "UMAP2"],
    c="red", alpha=0.8, s=30, label="bad"
)
ax.set_title("UMAP Embedding - All Machines Combined")
ax.set_xlabel("UMAP1")
ax.set_ylabel("UMAP2")
ax.legend(title="Status")
plt.tight_layout()
plt.savefig(export_dir / "umap_all_machines.png")
plt.show()