In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import ast
import re

# read a csv file
df = pd.read_csv("step_analysis_results.csv")

In [None]:
#step_names Expand (exclude unknown, remove whitespace)
rows = []
for _, row in df.iterrows():
    steps = ast.literal_eval(row["step_names"])
    for step in steps:
        step = str(step).strip()
        if step and step != "unknown":
            rows.append({"timestamp": row["timestamp"], "step_name": step})

df_expanded = pd.DataFrame(rows)

# Extract the first number (if there is no number, it will be a large number)
def step_num(name: str) -> int:
    m = re.match(r"^\s*(\d+)", name)
    return int(m.group(1)) if m else 9999

# Sort strictly in the desired order (number → name)
ordered_steps = sorted(df_expanded["step_name"].unique(),
                       key=lambda s: (step_num(s), s))

# Draw explicitly by mapping to y-coordinates
y_map = {name: i for i, name in enumerate(ordered_steps)}
df_expanded["y"] = df_expanded["step_name"].map(y_map)

plt.figure(figsize=(10, 6))
plt.scatter(df_expanded["timestamp"], df_expanded["y"], marker="o")

# Axis labels and scales
plt.yticks(list(y_map.values()), list(y_map.keys()))
plt.xlabel("Timestamp (s)")
plt.ylabel("Step Names (numeric order)")
plt.title("Scatter Plot of Step Names over Time")
plt.grid(True, axis="x", linestyle="--", alpha=0.5)
plt.tight_layout()

#plt.savefig("step_names_scatter_numeric_order.png", dpi=200)
plt.show()


In [None]:
#step_names Expand (exclude unknown, remove whitespace)
#add vertical lines to seperate steps

rows = []
for _, row in df.iterrows():
    steps = ast.literal_eval(row["step_names"])
    for step in steps:
        step = str(step).strip()
        if step and step != "unknown":
            rows.append({"timestamp": row["timestamp"], "step_name": step})

df_expanded = pd.DataFrame(rows)

# Extract the first number (if there is no number, it will be a large number)
def step_num(name: str) -> int:
    m = re.match(r"^\s*(\d+)", name)
    return int(m.group(1)) if m else 9999

# Sort strictly in the desired order (number → name)
ordered_steps = sorted(df_expanded["step_name"].unique(),
                       key=lambda s: (step_num(s), s))

# Draw explicitly by mapping to y-coordinates
y_map = {name: i for i, name in enumerate(ordered_steps)}
df_expanded["y"] = df_expanded["step_name"].map(y_map)

plt.figure(figsize=(10, 6))
plt.scatter(df_expanded["timestamp"], df_expanded["y"], marker="o"  )

# Add vertical lines at specified timestamps
vertical_lines = [3.5, 6.5, 18, 63, 90.5, 93, 95, 97]
for timestamp in vertical_lines:
    plt.axvline(x=timestamp, color='red', linestyle='--', alpha=0.7, linewidth=1)

# Axis labels and scales
plt.yticks(list(y_map.values()), list(y_map.keys()))
plt.xlabel("Timestamp (s)")
plt.ylabel("Step Names (numeric order)")
plt.title("Scatter Plot of Step Names over Time")
plt.grid(True, axis="x", linestyle="--", alpha=0.5)
plt.tight_layout()

#plt.savefig("step_names_scatter_numeric_order.png", dpi=200)
plt.show()