In [1]:
import json
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as snspy

Matplotlib is building the font cache; this may take a moment.


In [10]:

def extract_benchmark_data():
    """
    Extract benchmark data for credential scaling analysis with fixed message count
    """
    results = []
    base_path = Path("../target/criterion/ps_equality_proof")

    print(f"Analyzing benchmark data from: {base_path}")

    # Modified glob pattern for new benchmark structure
    bench_dirs = list(base_path.glob("credentials_*"))
    print(f"\nFound {len(bench_dirs)} credential configurations:")
    for dir in bench_dirs:
        print(f"  {dir.name}")

    for bench_dir in bench_dirs:
        # Extract credential count from directory name
        creds = int(bench_dir.name.split('_')[1])

        estimates_file = bench_dir / "new" / "estimates.json"
        print(f"\nAnalyzing credential configuration: {creds}")
        print(f"Reading from: {estimates_file}")

        if estimates_file.exists():
            try:
                with open(estimates_file, 'r') as f:
                    data = json.load(f)

                # Extract timing metrics (converting nanoseconds to milliseconds)
                mean_time = data['mean']['point_estimate'] / 1_000_000
                std_dev = data['mean']['standard_error'] / 1_000_000

                results.append({
                    'Credentials': creds,
                    'Time (ms)': mean_time,
                    'Std Dev': std_dev
                })
                print(f"✓ Successfully processed data for {creds} credentials")

            except json.JSONDecodeError as e:
                print(f"× JSON parsing error: {e}")
            except KeyError as e:
                print(f"× Missing data field: {e}")
            except Exception as e:
                print(f"× Unexpected error: {e}")
        else:
            print(f"× No estimates file found at {estimates_file}")

    if results:
        # Create DataFrame and sort by credential count
        df = pd.DataFrame(results)
        df = df.sort_values('Credentials')

        # Save results
        df.to_csv('ps_credential_scaling.csv', index=False)
        df.to_json('ps_credential_scaling.json', orient='records', indent=2)

        print("\nScaling Analysis Summary:")
        print("-" * 50)
        print(df)
        print("\nData saved to: ps_credential_scaling.csv and ps_credential_scaling.json")

        # Calculate scaling factors between consecutive measurements
        if len(df) > 1:
            print("\nScaling Factors:")
            print("-" * 50)
            for i in range(len(df)-1):
                ratio = df.iloc[i+1]['Time (ms)'] / df.iloc[i]['Time (ms)']
                cred_ratio = df.iloc[i+1]['Credentials'] / \
                    df.iloc[i]['Credentials']
                print(
                    f"Scaling factor from {df.iloc[i]['Credentials']} to {df.iloc[i+1]['Credentials']} credentials: {ratio:.2f}x")
                print(
                    f"Relative scaling: {(ratio/cred_ratio):.2f}x per credential ratio")
    else:
        print("\nNo data was collected!")


if __name__ == "__main__":
    extract_benchmark_data()

Analyzing benchmark data from: ../target/criterion/ps_equality_proof

Found 19 credential configurations:
  credentials_5
  credentials_2
  credentials_3
  credentials_4
  credentials_13
  credentials_14
  credentials_15
  credentials_12
  credentials_6
  credentials_8
  credentials_9
  credentials_7
  credentials_17
  credentials_10
  credentials_19
  credentials_20
  credentials_18
  credentials_11
  credentials_16

Analyzing credential configuration: 5
Reading from: ../target/criterion/ps_equality_proof/credentials_5/new/estimates.json
✓ Successfully processed data for 5 credentials

Analyzing credential configuration: 2
Reading from: ../target/criterion/ps_equality_proof/credentials_2/new/estimates.json
✓ Successfully processed data for 2 credentials

Analyzing credential configuration: 3
Reading from: ../target/criterion/ps_equality_proof/credentials_3/new/estimates.json
✓ Successfully processed data for 3 credentials

Analyzing credential configuration: 4
Reading from: ../target/

In [11]:

def load_scaling_data(filename="ps_credential_scaling.csv"):
    """Load benchmark data with error handling."""
    try:
        if filename.endswith('.csv'):
            df = pd.read_csv(filename)
        else:  # json
            df = pd.read_json(filename)

        print("Loaded data summary:")
        print(df.to_string(index=False))
        return df
    except FileNotFoundError:
        print(f"Error: Could not find {filename}")
        return None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None


def analyze_scaling(df):
    """Analyze and visualize scaling behavior."""
    plt.figure(figsize=(10, 6))

    # Empirical measurements
    plt.scatter(df['Credentials'], df['Time (ms)'],
                color='blue', label='Measured Performance', s=100)

    # Compute ideal linear scaling
    base_time = df['Time (ms)'].iloc[0]
    base_creds = df['Credentials'].iloc[0]
    x_line = np.linspace(0, df['Credentials'].max() * 1.1, 100)
    y_line = (base_time/base_creds) * x_line

    # Plot ideal scaling
    plt.plot(x_line, y_line, '--', color='red',
             label='Ideal Linear Scaling', alpha=0.7)

    # Add error bars if standard deviation is available
    if 'Std Dev' in df.columns:
        plt.errorbar(df['Credentials'], df['Time (ms)'],
                     yerr=df['Std Dev'], fmt='none',
                     capsize=5, color='blue', alpha=0.5)

    plt.xlabel('Number of Credentials')
    plt.ylabel('Execution Time (ms)')
    plt.title('Credential Scaling Analysis: Measured vs. Linear')
    plt.grid(True, alpha=0.3)
    plt.legend()

    # Save the visualization
    plt.savefig('credential_scaling_analysis.png',
                dpi=300, bbox_inches='tight')
    plt.close()

    # Analyze deviation from linearity
    print("\nDeviation from Linear Scaling:")
    print("-" * 60)
    print(f"{'Credentials':<12} | {'Expected (ms)':<14} | {'Actual (ms)':<12} | {'Deviation %':<10}")
    print("-" * 60)

    for idx, row in df.iterrows():
        expected = (base_time/base_creds) * row['Credentials']
        actual = row['Time (ms)']
        deviation = ((actual - expected) / expected) * 100
        print(
            f"{row['Credentials']:<12} | {expected:>13.2f} | {actual:>11.2f} | {deviation:>9.2f}%")


def main():
    df = load_scaling_data()
    if df is not None:
        analyze_scaling(df)


if __name__ == "__main__":
    main()

Loaded data summary:
 Credentials  Time (ms)  Std Dev
           2  45.605262 0.655947
           3  79.667558 4.053240
           4  94.333378 1.838871
           5 115.934479 3.241737
           6 146.710577 5.336742
           7 164.918363 5.996250
           8 181.249899 3.544318
           9 202.755512 2.483289
          10 223.717574 1.610042
          11 258.259320 3.177968
          12 273.953611 2.465497
          13 286.512813 5.778362
          14 300.577468 0.406632
          15 325.521069 1.503707
          16 343.241543 0.169053
          17 365.224036 0.271068
          18 386.000952 0.259687
          19 410.192069 1.744022
          20 452.220645 4.919796

Deviation from Linear Scaling:
------------------------------------------------------------
Credentials  | Expected (ms)  | Actual (ms)  | Deviation %
------------------------------------------------------------
2.0          |         45.61 |       45.61 |      0.00%
3.0          |         68.41 |       79.67 |     1