In [None]:
# Import python packages
import streamlit as st
import pandas as pd

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


df = pd.read_csv('JAZHUANG 2025-11-17 15:04:00/bench1.csv')
st.dataframe(df)

In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Load the data
df = pd.read_csv('JAZHUANG 2025-11-17 15:04:00/bench4.csv')

# Sort by timestamp to ensure proper ordering
df = df.sort_values('Timestamp').reset_index(drop=True)

# Create elapsed time from first timestamp (normalized to start at 0)
# Using raw timestamp values as they are already in seconds
first_timestamp = df['Timestamp'].min()
df['ElapsedSeconds'] = df['Timestamp'] - first_timestamp

# Method 1: Using Streamlit's native charting with elapsed time
st.subheader("Latency Over Time by Type (Elapsed Time)")

# Pivot the data for streamlit line_chart using elapsed seconds
pivot_df = df.pivot_table(index='ElapsedSeconds', columns='Type', values='LatencyMs', aggfunc='mean').reset_index()
st.line_chart(pivot_df.set_index('ElapsedSeconds'))

# Method 2: Using matplotlib (more control) with elapsed time and separate medians
st.subheader("Latency Over Time (Matplotlib - Elapsed Time with Type-Specific Medians)")

fig, ax = plt.subplots(figsize=(14, 8))

# Define colors for consistency
colors = {'Range': 'blue', 'Value': 'orange'}
median_colors = {'Range': 'darkblue', 'Value': 'darkorange'}

# Plot separate lines for each Type using elapsed seconds
for type_name in df['Type'].unique():
    type_data = df[df['Type'] == type_name]
    ax.plot(type_data['ElapsedSeconds'], type_data['LatencyMs'], 
            label=f'{type_name}', color=colors.get(type_name, 'gray'), 
            marker='o', markersize=0.3, linewidth=0.6, alpha=0.6)

# Calculate and plot median latency per second for each type separately
df['ElapsedSecondsRounded'] = np.floor(df['ElapsedSeconds']).astype(int)

median_per_second_by_type = {}
for type_name in df['Type'].unique():
    type_data = df[df['Type'] == type_name]
    median_data = type_data.groupby('ElapsedSecondsRounded')['LatencyMs'].median().reset_index()
    median_per_second_by_type[type_name] = median_data
    
    # Plot median line for this type
    ax.plot(median_data['ElapsedSecondsRounded'], median_data['LatencyMs'], 
            label=f'{type_name} Median', color=median_colors.get(type_name, 'black'), 
            linewidth=2.5, marker='s', markersize=4, alpha=0.9)

ax.set_xlabel('Elapsed Time (seconds)')
ax.set_ylabel('Latency (Ms)')
ax.set_title('Latency Over Time by Type with Separate Medians per Second')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
st.pyplot(fig)

# Display timing information
st.subheader("Timing Information")
st.write(f"First timestamp: {first_timestamp}")
st.write(f"Last timestamp: {df['Timestamp'].max()}")
st.write(f"Total duration: {df['ElapsedSeconds'].max():.6f} seconds")
st.write(f"Total duration: {df['ElapsedSeconds'].max()/60:.6f} minutes")

# Display data summary
st.subheader("Data Summary")
st.write(f"Total records: {len(df)}")
st.write("Records by Type:")
st.write(df['Type'].value_counts())

# Show median statistics per second by type
st.subheader("Median Latency Statistics by Type")
for type_name in df['Type'].unique():
    type_data = df[df['Type'] == type_name]
    median_data = median_per_second_by_type[type_name]
    
    st.write(f"**{type_name} Type:**")
    st.write(f"  - Overall median latency: {type_data['LatencyMs'].median():.3f} ms")
    st.write(f"  - Median per second - Min: {median_data['LatencyMs'].min():.3f} ms")
    st.write(f"  - Median per second - Max: {median_data['LatencyMs'].max():.3f} ms")
    st.write(f"  - Median per second - Average: {median_data['LatencyMs'].mean():.3f} ms")
    st.write("")

# Show raw data with elapsed time
st.subheader("Raw Data Preview (with Elapsed Time)")
display_df = df[['Timestamp', 'ElapsedSeconds', 'LatencyMs', 'Type']].head(20)
st.dataframe(display_df)

# Show median per second data for each type
st.subheader("Median Latency per Second by Type (Preview)")
for type_name in df['Type'].unique():
    st.write(f"**{type_name} Median per Second:**")
    st.dataframe(median_per_second_by_type[type_name].head(10))

In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Load the data
df = pd.read_csv('JAZHUANG 2025-11-17 15:04:00/bencha0.csv')

# Check available columns
st.write("**Available columns:**", list(df.columns))

# Remove outliers: filter out p99.9 values for latency
p99_9 = df['LatencyMs'].quantile(0.999)
original_count = len(df)
df = df[df['LatencyMs'] <= p99_9].copy()
filtered_count = len(df)

st.write(f"**Outlier Filtering:**")
st.write(f"- P99.9 threshold: {p99_9:.3f} ms")
st.write(f"- Original records: {original_count:,}")
st.write(f"- After filtering: {filtered_count:,}")
st.write(f"- Removed {original_count - filtered_count:,} outliers ({((original_count - filtered_count) / original_count * 100):.2f}%)")
st.write("")

# Sort by timestamp to ensure proper ordering
df = df.sort_values('Timestamp').reset_index(drop=True)

# Create elapsed time from first timestamp (normalized to start at 0)
first_timestamp = df['Timestamp'].min()
df['ElapsedSeconds'] = df['Timestamp'] - first_timestamp

# Calculate delta if PTree columns exist
if 'PTreeAllocated' in df.columns and 'PTreeReleased' in df.columns:
    df['PTreeDelta'] = df['PTreeAllocated'] - df['PTreeReleased']
    st.write("✅ PTree columns found - calculating delta (PTreeAllocated - PTreeReleased)")
else:
    st.write("⚠️ PTree columns not found in the data")

# Method 1: Latency Analysis with Streamlit
st.subheader("Latency Over Time by Type (Elapsed Time) - Outliers Removed")

# Pivot the data for streamlit line_chart using elapsed seconds
pivot_df = df.pivot_table(index='ElapsedSeconds', columns='Type', values='LatencyMs', aggfunc='mean').reset_index()
st.line_chart(pivot_df.set_index('ElapsedSeconds'))

# Method 2: Combined Latency and PTreeDelta Analysis
if 'PTreeAllocated' in df.columns and 'PTreeReleased' in df.columns:
    st.subheader("Latency and PTree Delta Analysis (Combined)")
    
    fig, ax1 = plt.subplots(figsize=(16, 10))
    
    # Define colors for consistency
    colors = {'Range': 'blue', 'Value': 'orange'}
    median_colors = {'Range': 'darkblue', 'Value': 'darkorange'}
    
    # Plot latency lines for each Type on primary y-axis
    for type_name in df['Type'].unique():
        type_data = df[df['Type'] == type_name]
        ax1.plot(type_data['ElapsedSeconds'], type_data['LatencyMs'], 
                label=f'{type_name} Latency', color=colors.get(type_name, 'gray'), 
                marker='o', markersize=0.3, linewidth=0.6, alpha=0.6)
    
    # Calculate and plot median latency per second for each type
    df['ElapsedSecondsRounded'] = np.floor(df['ElapsedSeconds']).astype(int)
    
    for type_name in df['Type'].unique():
        type_data = df[df['Type'] == type_name]
        median_data = type_data.groupby('ElapsedSecondsRounded')['LatencyMs'].median().reset_index()
        
        # Plot median line for this type
        ax1.plot(median_data['ElapsedSecondsRounded'], median_data['LatencyMs'], 
                label=f'{type_name} Latency Median', color=median_colors.get(type_name, 'black'), 
                linewidth=2.5, marker='s', markersize=4, alpha=0.9)
    
    # Set up primary y-axis (latency)
    ax1.set_xlabel('Elapsed Time (seconds)')
    ax1.set_ylabel('Latency (Ms)', color='black')
    ax1.tick_params(axis='y', labelcolor='black')
    ax1.grid(True, alpha=0.3)
    
    # Create secondary y-axis for PTreeDelta
    ax2 = ax1.twinx()
    
    # Plot PTreeDelta (without separating by type)
    ax2.plot(df['ElapsedSeconds'], df['PTreeDelta'], 
            label='PTree Delta (Allocated - Released)', color='red', 
            linewidth=1.5, alpha=0.8)
    
    # Calculate and plot median PTreeDelta per second
    ptree_median_data = df.groupby('ElapsedSecondsRounded')['PTreeDelta'].median().reset_index()
    ax2.plot(ptree_median_data['ElapsedSecondsRounded'], ptree_median_data['PTreeDelta'], 
            label='PTree Delta Median', color='darkred', 
            linewidth=3, marker='D', markersize=5, alpha=0.9)
    
    # Set up secondary y-axis (PTreeDelta)
    ax2.set_ylabel('PTree Delta (bytes)', color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    
    # Combine legends from both axes
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left', bbox_to_anchor=(0.02, 0.98))
    
    ax1.set_title('Latency and PTree Delta Over Time (P99.9 Outliers Removed)')
    
    plt.tight_layout()
    st.pyplot(fig)
    
    # PTreeDelta Statistics
    st.subheader("PTree Delta Statistics")
    st.write(f"**Overall PTree Delta:**")
    st.write(f"  - Mean: {df['PTreeDelta'].mean():.0f} bytes")
    st.write(f"  - Median: {df['PTreeDelta'].median():.0f} bytes")
    st.write(f"  - Max: {df['PTreeDelta'].max():.0f} bytes")
    st.write(f"  - Min: {df['PTreeDelta'].min():.0f} bytes")
    st.write(f"  - Std deviation: {df['PTreeDelta'].std():.0f} bytes")
    st.write("")

else:
    # Fallback to original latency-only plot if PTree columns don't exist
    st.subheader("Latency Analysis (Matplotlib)")
    
    fig, ax = plt.subplots(figsize=(14, 8))
    
    # Define colors for consistency
    colors = {'Range': 'blue', 'Value': 'orange'}
    median_colors = {'Range': 'darkblue', 'Value': 'darkorange'}
    
    # Plot separate lines for each Type using elapsed seconds
    for type_name in df['Type'].unique():
        type_data = df[df['Type'] == type_name]
        ax.plot(type_data['ElapsedSeconds'], type_data['LatencyMs'], 
                label=f'{type_name}', color=colors.get(type_name, 'gray'), 
                marker='o', markersize=0.3, linewidth=0.6, alpha=0.6)
    
    # Calculate and plot median latency per second for each type separately
    df['ElapsedSecondsRounded'] = np.floor(df['ElapsedSeconds']).astype(int)
    
    median_per_second_by_type = {}
    for type_name in df['Type'].unique():
        type_data = df[df['Type'] == type_name]
        median_data = type_data.groupby('ElapsedSecondsRounded')['LatencyMs'].median().reset_index()
        median_per_second_by_type[type_name] = median_data
        
        # Plot median line for this type
        ax.plot(median_data['ElapsedSecondsRounded'], median_data['LatencyMs'], 
                label=f'{type_name} Median', color=median_colors.get(type_name, 'black'), 
                linewidth=2.5, marker='s', markersize=4, alpha=0.9)
    
    ax.set_xlabel('Elapsed Time (seconds)')
    ax.set_ylabel('Latency (Ms)')
    ax.set_title('Latency Over Time by Type with Separate Medians per Second (P99.9 Outliers Removed)')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    st.pyplot(fig)

# Display timing information
st.subheader("Timing Information")
st.write(f"First timestamp: {first_timestamp}")
st.write(f"Last timestamp: {df['Timestamp'].max()}")
st.write(f"Total duration: {df['ElapsedSeconds'].max():.6f} seconds")
st.write(f"Total duration: {df['ElapsedSeconds'].max()/60:.6f} minutes")

# Display data summary
st.subheader("Data Summary (After Outlier Removal)")
st.write(f"Total records: {len(df):,}")
st.write("Records by Type:")
st.write(df['Type'].value_counts())

# Show percentile information
st.subheader("Latency Percentile Information")
percentiles = [50, 90, 95, 99, 99.9]
for p in percentiles:
    st.write(f"P{p}: {df['LatencyMs'].quantile(p/100):.3f} ms")

# Show median statistics per second by type
st.subheader("Median Latency Statistics by Type (Outliers Removed)")
if 'median_per_second_by_type' in locals():
    for type_name in df['Type'].unique():
        type_data = df[df['Type'] == type_name]
        median_data = median_per_second_by_type[type_name]
        
        st.write(f"**{type_name} Type:**")
        st.write(f"  - Overall median latency: {type_data['LatencyMs'].median():.3f} ms")
        st.write(f"  - Mean latency: {type_data['LatencyMs'].mean():.3f} ms")
        st.write(f"  - Std deviation: {type_data['LatencyMs'].std():.3f} ms")
        st.write(f"  - Median per second - Min: {median_data['LatencyMs'].min():.3f} ms")
        st.write(f"  - Median per second - Max: {median_data['LatencyMs'].max():.3f} ms")
        st.write(f"  - Median per second - Average: {median_data['LatencyMs'].mean():.3f} ms")
        st.write("")

# Show raw data with elapsed time
st.subheader("Raw Data Preview (with Elapsed Time, Outliers Removed)")
if 'PTreeAllocated' in df.columns and 'PTreeReleased' in df.columns:
    display_df = df[['Timestamp', 'ElapsedSeconds', 'LatencyMs', 'PTreeAllocated', 'PTreeReleased', 'PTreeDelta', 'Type']].head(20)
else:
    display_df = df[['Timestamp', 'ElapsedSeconds', 'LatencyMs', 'Type']].head(20)
st.dataframe(display_df)

In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Load the JSON data
try:
    # Try reading as JSON first
    with open('JAZHUANG 2025-11-17 15:04:00/bench60_2.csv', 'r') as f:
        data = []
        for line in f:
            if line.strip():  # Skip empty lines
                data.append(json.loads(line))
        df = pd.DataFrame(data)
    st.write("✅ Successfully loaded JSON data")
except json.JSONDecodeError:
    # If JSON fails, try as CSV
    try:
        df = pd.read_csv('JAZHUANG 2025-11-17 15:04:00/benchb0_2.csv')
        st.write("✅ Successfully loaded CSV data")
    except Exception as e:
        st.error(f"Failed to load data: {e}")
        st.stop()

# Check available columns
st.write("**Available columns:**", list(df.columns))

# Check if required columns exist
required_columns = ['ReadVersion', 'TotalUs']
optional_columns = ['StartWaitUs']
missing_required = [col for col in required_columns if col not in df.columns]

if missing_required:
    st.error(f"Required columns {missing_required} not found in the data")
    st.write("Available columns:", list(df.columns))
    st.stop()

# Check for StartWaitUs column
has_start_wait = 'StartWaitUs' in df.columns
if has_start_wait:
    st.write("✅ StartWaitUs column found - will include in analysis")
else:
    st.write("⚠️ StartWaitUs column not found - proceeding with TotalUs only")

# Remove outliers: filter out p99.9 values for TotalUs
p99_9_total = df['TotalUs'].quantile(0.999)
original_count = len(df)
df = df[df['TotalUs'] <= p99_9_total].copy()

# Also filter StartWaitUs outliers if the column exists
if has_start_wait:
    p99_9_start = df['StartWaitUs'].quantile(0.999)
    df = df[df['StartWaitUs'] <= p99_9_start].copy()
    
filtered_count = len(df)

st.write(f"**Outlier Filtering:**")
st.write(f"- TotalUs P99.9 threshold: {p99_9_total:.3f} µs")
if has_start_wait:
    st.write(f"- StartWaitUs P99.9 threshold: {p99_9_start:.3f} µs")
st.write(f"- Original records: {original_count:,}")
st.write(f"- After filtering: {filtered_count:,}")
st.write(f"- Removed {original_count - filtered_count:,} outliers ({((original_count - filtered_count) / original_count * 100):.2f}%)")
st.write("")

# Sort by ReadVersion to ensure proper ordering
df = df.sort_values('ReadVersion').reset_index(drop=True)

# Create normalized ReadVersion starting from 0
first_version = df['ReadVersion'].min()
df['NormalizedReadVersion'] = df['ReadVersion'] - first_version

st.write(f"**Data Summary (After Outlier Removal):**")
st.write(f"- Total records: {len(df):,}")
st.write(f"- ReadVersion range: {df['ReadVersion'].min():,} to {df['ReadVersion'].max():,}")
st.write(f"- TotalUs range: {df['TotalUs'].min():.2f} to {df['TotalUs'].max():.2f}")
if has_start_wait:
    st.write(f"- StartWaitUs range: {df['StartWaitUs'].min():.2f} to {df['StartWaitUs'].max():.2f}")
st.write("")

# Method 1: Streamlit native charting
if has_start_wait:
    st.subheader("TotalUs and StartWaitUs Over ReadVersion (Streamlit Charts) - Outliers Removed")
    
    # Create charts for both metrics
    chart_df = df.set_index('NormalizedReadVersion')[['TotalUs', 'StartWaitUs']]
    st.line_chart(chart_df)
else:
    st.subheader("TotalUs Over ReadVersion (Streamlit Chart) - Outliers Removed")
    
    # Create a simple line chart
    chart_df = df.set_index('NormalizedReadVersion')[['TotalUs']]
    st.line_chart(chart_df)

# Method 2: Combined Analysis with Matplotlib
interval_size = 1000000
df['ReadVersionInterval'] = (df['NormalizedReadVersion'] // interval_size) * interval_size

if has_start_wait:
    st.subheader("TotalUs and StartWaitUs Analysis with Medians - Outliers Removed")
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 12))
    
    # Plot 1: TotalUs
    ax1.scatter(df['NormalizedReadVersion'], df['TotalUs'], 
               color='blue', alpha=0.6, s=1, label='TotalUs')
    
    # Calculate median TotalUs per interval
    median_total_data = df.groupby('ReadVersionInterval')['TotalUs'].median().reset_index()
    median_total_data['IntervalMidpoint'] = median_total_data['ReadVersionInterval'] + (interval_size / 2)
    
    # Plot median line for TotalUs
    ax1.plot(median_total_data['IntervalMidpoint'], median_total_data['TotalUs'], 
             color='darkblue', linewidth=3, marker='o', markersize=6, 
             label=f'Median per {interval_size:,} ReadVersion')
    
    ax1.set_xlabel('Normalized ReadVersion')
    ax1.set_ylabel('TotalUs (µs)', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.grid(True, alpha=0.3)
    ax1.legend(loc='upper left')
    ax1.set_title('TotalUs Over ReadVersion with Median per 1M Interval')
    ax1.ticklabel_format(style='plain', axis='x')
    
    # Plot 2: StartWaitUs
    ax2.scatter(df['NormalizedReadVersion'], df['StartWaitUs'], 
               color='orange', alpha=0.6, s=1, label='StartWaitUs')
    
    # Calculate median StartWaitUs per interval
    median_start_data = df.groupby('ReadVersionInterval')['StartWaitUs'].median().reset_index()
    median_start_data['IntervalMidpoint'] = median_start_data['ReadVersionInterval'] + (interval_size / 2)
    
    # Plot median line for StartWaitUs
    ax2.plot(median_start_data['IntervalMidpoint'], median_start_data['StartWaitUs'], 
             color='darkorange', linewidth=3, marker='s', markersize=6, 
             label=f'Median per {interval_size:,} ReadVersion')
    
    ax2.set_xlabel('Normalized ReadVersion')
    ax2.set_ylabel('StartWaitUs (µs)', color='orange')
    ax2.tick_params(axis='y', labelcolor='orange')
    ax2.grid(True, alpha=0.3)
    ax2.legend(loc='upper left')
    ax2.set_title('StartWaitUs Over ReadVersion with Median per 1M Interval')
    ax2.ticklabel_format(style='plain', axis='x')
    plt.setp(ax2.get_xticklabels(), rotation=45)
    
    plt.tight_layout()
    st.pyplot(fig)
    
else:
    st.subheader("TotalUs Analysis with Median - Outliers Removed")
    
    fig, ax1 = plt.subplots(figsize=(16, 10))
    
    # Plot raw TotalUs data on primary y-axis
    ax1.scatter(df['NormalizedReadVersion'], df['TotalUs'], 
               color='blue', alpha=0.6, s=1, label='TotalUs')
    
    # Calculate median TotalUs per interval
    median_total_data = df.groupby('ReadVersionInterval')['TotalUs'].median().reset_index()
    median_total_data['IntervalMidpoint'] = median_total_data['ReadVersionInterval'] + (interval_size / 2)
    
    # Plot median line
    ax1.plot(median_total_data['IntervalMidpoint'], median_total_data['TotalUs'], 
             color='darkblue', linewidth=3, marker='o', markersize=6, 
             label=f'Median per {interval_size:,} ReadVersion')
    
    # Set up primary y-axis
    ax1.set_xlabel('Normalized ReadVersion')
    ax1.set_ylabel('TotalUs (µs)', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.grid(True, alpha=0.3)
    ax1.legend(loc='upper left')
    
    # Format x-axis to show numbers in a readable format
    ax1.ticklabel_format(style='plain', axis='x')
    plt.setp(ax1.get_xticklabels(), rotation=45)
    
    ax1.set_title('TotalUs Over ReadVersion with Median per 1M Interval (P99.9 Outliers Removed)')
    
    plt.tight_layout()
    st.pyplot(fig)

# Show percentile information
st.subheader("Percentile Information")
percentiles = [50, 90, 95, 99, 99.9]

st.write("**TotalUs Percentiles:**")
for p in percentiles:
    st.write(f"P{p}: {df['TotalUs'].quantile(p/100):.3f} µs")

if has_start_wait:
    st.write("**StartWaitUs Percentiles:**")
    for p in percentiles:
        st.write(f"P{p}: {df['StartWaitUs'].quantile(p/100):.3f} µs")

# Statistics
st.subheader("Statistics (After Outlier Removal)")

st.write(f"**TotalUs Statistics:**")
st.write(f"  - Mean: {df['TotalUs'].mean():.3f} µs")
st.write(f"  - Median: {df['TotalUs'].median():.3f} µs")
st.write(f"  - Max: {df['TotalUs'].max():.3f} µs")
st.write(f"  - Min: {df['TotalUs'].min():.3f} µs")
st.write(f"  - Std deviation: {df['TotalUs'].std():.3f} µs")
st.write("")

if has_start_wait:
    st.write(f"**StartWaitUs Statistics:**")
    st.write(f"  - Mean: {df['StartWaitUs'].mean():.3f} µs")
    st.write(f"  - Median: {df['StartWaitUs'].median():.3f} µs")
    st.write(f"  - Max: {df['StartWaitUs'].max():.3f} µs")
    st.write(f"  - Min: {df['StartWaitUs'].min():.3f} µs")
    st.write(f"  - Std deviation: {df['StartWaitUs'].std():.3f} µs")
    st.write("")

# Median interval statistics
st.write(f"**Median per {interval_size:,} ReadVersion intervals:**")
st.write(f"  - Number of intervals: {len(median_total_data)}")
st.write(f"  - TotalUs median range: {median_total_data['TotalUs'].min():.3f} to {median_total_data['TotalUs'].max():.3f} µs")
st.write(f"  - TotalUs average of medians: {median_total_data['TotalUs'].mean():.3f} µs")

if has_start_wait:
    st.write(f"  - StartWaitUs median range: {median_start_data['StartWaitUs'].min():.3f} to {median_start_data['StartWaitUs'].max():.3f} µs")
    st.write(f"  - StartWaitUs average of medians: {median_start_data['StartWaitUs'].mean():.3f} µs")

# Show median data per interval
st.subheader("Median Values per Interval (Outliers Removed)")

if has_start_wait:
    # Combine both median datasets
    combined_median = median_total_data.merge(median_start_data, on=['ReadVersionInterval', 'IntervalMidpoint'], suffixes=('_Total', '_Start'))
    combined_display = combined_median.copy()
    combined_display['ReadVersionInterval'] = combined_display['ReadVersionInterval'].apply(lambda x: f"{x:,}")
    combined_display['TotalUs'] = combined_display['TotalUs'].round(3)
    combined_display['StartWaitUs_Start'] = combined_display['StartWaitUs_Start'].round(3)
    st.dataframe(combined_display[['ReadVersionInterval', 'TotalUs', 'StartWaitUs_Start']].rename(columns={
        'TotalUs': 'TotalUs_Median',
        'StartWaitUs_Start': 'StartWaitUs_Median'
    }))
else:
    median_display = median_total_data.copy()
    median_display['ReadVersionInterval'] = median_display['ReadVersionInterval'].apply(lambda x: f"{x:,}")
    median_display['TotalUs'] = median_display['TotalUs'].round(3)
    st.dataframe(median_display[['ReadVersionInterval', 'TotalUs']])

# Show raw data preview
st.subheader("Raw Data Preview (Outliers Removed)")
if has_start_wait:
    display_df = df[['ReadVersion', 'NormalizedReadVersion', 'TotalUs', 'StartWaitUs', 'ReadVersionInterval']].head(20)
else:
    display_df = df[['ReadVersion', 'NormalizedReadVersion', 'TotalUs', 'ReadVersionInterval']].head(20)
st.dataframe(display_df)

