# Batch Ring-Down Analysis Example

This notebook demonstrates comprehensive batch analysis of multiple ring-down measurement files, including:
- Batch processing of multiple data files
- Frequency estimation comparison (NLS vs DFT)
- Q factor analysis
- Consistency analysis across realizations
- CRLB comparison analysis
- Comprehensive visualizations


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import glob
from joblib import Parallel, delayed

from ringdownanalysis import BatchRingDownAnalyzer, RingDownAnalyzer

# Apply consistent plotting style
from ringdownanalysis import plots
plots.apply_plotting_style()


## Initialize Batch Analyzer

The `BatchRingDownAnalyzer` provides a high-level interface for analyzing multiple ring-down data files in parallel.


In [None]:
# Initialize batch analyzer
batch_analyzer = BatchRingDownAnalyzer()

# Set data directory (relative to notebook location)
data_dir = Path('../data')
print(f"Data directory: {data_dir.absolute()}")
print(f"Directory exists: {data_dir.exists()}")


## Process All Files

Process all data files in the directory using joblib for parallelization. The analyzer will:
1. Load each file
2. Estimate tau from full data
3. Crop data to 3×tau
4. Estimate frequency using NLS and DFT methods
5. Estimate noise parameters
6. Calculate CRLB bounds

We use joblib.Parallel to process multiple files in parallel across available CPU cores.


In [None]:
# Find all data files
csv_files = sorted(glob.glob(str(data_dir / '*.csv')))
mat_files = sorted(glob.glob(str(data_dir / '*.mat')))
all_files = csv_files + mat_files

print(f"Found {len(csv_files)} CSV files and {len(mat_files)} MAT files")
print(f"Total files to process: {len(all_files)}")

# Helper function for parallel processing with joblib
def analyze_single_file(filepath):
    """Analyze a single file - used with joblib.Parallel."""
    try:
        analyzer = RingDownAnalyzer()
        result = analyzer.analyze_file(filepath, max_tau_multiplier=1.0)
        return result
    except Exception as e:
        print(f"Error processing {Path(filepath).name}: {e}")
        return None

# Process files in parallel using joblib
# n_jobs=-1 uses all available CPU cores
# Set n_jobs=1 for sequential processing, or specify a number
print(f"\nProcessing {len(all_files)} files using joblib.Parallel...")
results = Parallel(n_jobs=-1, verbose=10)(
    delayed(analyze_single_file)(filepath) for filepath in all_files
)

# Filter out None results (failed files)
results = [r for r in results if r is not None]

# Store results in batch_analyzer for later use
batch_analyzer.results = results

print(f"\n{'='*70}")
print(f"Successfully processed {len(results)} files")
print(f"{'='*70}")

# Display summary for each file
if len(results) > 0:
    print("\nProcessing Summary:")
    for r in results:
        print(f"  {Path(r['filename']).name}:")
        print(f"    Sampling frequency: {r['fs']:.2f} Hz")
        print(f"    Estimated tau: {r['tau_est']:.2f} s")
        print(f"    NLS frequency: {r['f_nls']:.6f} Hz")
        print(f"    DFT frequency: {r['f_dft']:.6f} Hz")
        print(f"    Difference: {abs(r['f_nls'] - r['f_dft']):.6e} Hz")
        print(f"    CRLB std: {r['crlb_std_f']:.6e} Hz")


## Visualize Sample Time Series

Plot time series for a few sample files to visualize the ring-down signals.


In [None]:
if len(results) > 0:
    n_plots = len(results)

    fig, axes = plt.subplots(n_plots, 1, figsize=(14, 4*n_plots))
    
    if n_plots == 1:
        axes = [axes]
    
    for idx, (ax, r) in enumerate(zip(axes, results[:n_plots])):
        # Downsample for plotting if needed
        step = max(1, len(r['t']) // 50000)
        step_crop = max(1, len(r['t_crop']) // 50000)
        
        # Plot original data
        ax.plot(r['t'][::step], r['data'][::step], 'b-', alpha=0.5, 
                label='Original', linewidth=0.5)
        
        # Plot cropped data
        ax.plot(r['t_crop'][::step_crop], r['data_cropped'][::step_crop], 
                'r-', alpha=0.7, label='Cropped (3×τ)', linewidth=1)
        
        # Mark 3×tau boundary
        tau_3x = 3.0 * r['tau_est']
        ax.axvline(tau_3x, color='g', linestyle='--', linewidth=2, 
                   label=f"3×τ = {tau_3x:.2f} s")
        
        ax.set_xlabel('Time (s)', fontsize=11)
        ax.set_ylabel('Phase (cycles)', fontsize=11)
        ax.set_title(f"{Path(r['filename']).name}\n"
                    f"τ = {r['tau_est']:.2f} s, "
                    f"f_NLS = {r['f_nls']:.6f} Hz, "
                    f"f_DFT = {r['f_dft']:.6f} Hz", 
                    fontsize=10)
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("No results to plot.")


## Frequency Estimation Comparison

Compare NLS and DFT frequency estimation methods.


In [None]:
if len(results) > 0:
    f_nls_all = np.array([r['f_nls'] for r in results])
    f_dft_all = np.array([r['f_dft'] for r in results])
    diffs = np.abs(f_nls_all - f_dft_all)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Scatter plot: NLS vs DFT
    ax = axes[0]
    scatter = ax.scatter(f_nls_all, f_dft_all, s=100, alpha=0.6, 
                        edgecolors='black', linewidths=1)
    f_min = min(f_nls_all.min(), f_dft_all.min())
    f_max = max(f_nls_all.max(), f_dft_all.max())
    f_range = f_max - f_min
    ax.plot([f_min - 0.1*f_range, f_max + 0.1*f_range], 
            [f_min - 0.1*f_range, f_max + 0.1*f_range], 
            'r--', linewidth=2, label='Perfect agreement')
    ax.set_xlabel('NLS Frequency (Hz)', fontsize=11)
    ax.set_ylabel('DFT Frequency (Hz)', fontsize=11)
    ax.set_title('Frequency Estimation Comparison: NLS vs DFT', fontsize=12)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # Histogram of differences
    ax = axes[1]
    ax.hist(diffs, bins=20, edgecolor='black', alpha=0.7, color='steelblue')
    ax.axvline(np.mean(diffs), color='r', linestyle='--', linewidth=2, 
               label=f'Mean = {np.mean(diffs):.6e} Hz')
    ax.axvline(np.median(diffs), color='g', linestyle='--', linewidth=2, 
               label=f'Median = {np.median(diffs):.6e} Hz')
    ax.set_xlabel('|f_NLS - f_DFT| (Hz)', fontsize=11)
    ax.set_ylabel('Count', fontsize=11)
    ax.set_title('Frequency Difference Distribution', fontsize=12)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.show()
    
    print(f"Frequency Statistics:")
    print(f"  NLS mean: {np.mean(f_nls_all):.9f} Hz")
    print(f"  NLS std: {np.std(f_nls_all):.6e} Hz")
    print(f"  DFT mean: {np.mean(f_dft_all):.9f} Hz")
    print(f"  DFT std: {np.std(f_dft_all):.6e} Hz")
    print(f"  Mean difference: {np.mean(diffs):.6e} Hz")
    print(f"  Max difference: {np.max(diffs):.6e} Hz")
else:
    print("No results to plot.")


## Q Factor Analysis

Calculate and visualize Q factors for all measurements.


In [None]:
# Calculate Q factors
q_factors = batch_analyzer.calculate_q_factors()
q_stats = batch_analyzer.get_q_factor_statistics()

print(f"Q Factor Statistics:")
print(f"  Mean Q: {q_stats['mean']:.2e}")
print(f"  Std Q: {q_stats['std']:.2e}")
print(f"  Min Q: {q_stats['min']:.2e}")
print(f"  Max Q: {q_stats['max']:.2e}")
print(f"  Range: {q_stats['range']:.2e}")

# Visualize Q factors
if len(q_factors) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Histogram
    ax = axes[0]
    ax.hist(q_factors, bins=20, edgecolor='black', alpha=0.7, color='steelblue')
    ax.axvline(q_stats['mean'], color='r', linestyle='--', linewidth=2, 
               label=f"Mean = {q_stats['mean']:.2e}")
    ax.axvline(q_stats['mean'] + q_stats['std'], color='orange', 
               linestyle='--', linewidth=1, label=f"±1σ")
    ax.axvline(q_stats['mean'] - q_stats['std'], color='orange', 
               linestyle='--', linewidth=1)
    ax.set_xlabel('Q Factor', fontsize=11)
    ax.set_ylabel('Count', fontsize=11)
    ax.set_title('Q Factor Distribution', fontsize=12)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3, axis='y')
    
    # Box plot
    ax = axes[1]
    bp = ax.boxplot([q_factors], labels=['Q Factor'], patch_artist=True, 
                    widths=0.6)
    bp['boxes'][0].set_facecolor('lightblue')
    bp['boxes'][0].set_alpha(0.7)
    ax.set_ylabel('Q Factor', fontsize=11)
    ax.set_title('Q Factor Box Plot', fontsize=12)
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.show()


## Consistency Analysis

Analyze consistency of frequency estimates across all realizations.


In [None]:
# Perform consistency analysis
consistency = batch_analyzer.consistency_analysis()

print(f"Consistency Analysis Summary:")
print(f"  Number of realizations: {consistency['n_realizations']}")
print(f"  Number of pairwise comparisons: {consistency['n_pairwise_comparisons']}")
print(f"\nNLS Method:")
print(f"  Mean frequency: {consistency['nls_mean']:.9f} Hz")
print(f"  Std across realizations: {consistency['nls_std_across_realizations']:.6e} Hz")
print(f"  Coefficient of variation: {consistency['nls_cv']:.2e}")
print(f"  Range: [{consistency['nls_range'][0]:.9f}, {consistency['nls_range'][1]:.9f}] Hz")
print(f"  Span: {consistency['nls_span']:.6e} Hz")
print(f"\nDFT Method:")
print(f"  Mean frequency: {consistency['dft_mean']:.9f} Hz")
print(f"  Std across realizations: {consistency['dft_std_across_realizations']:.6e} Hz")
print(f"  Coefficient of variation: {consistency['dft_cv']:.2e}")
print(f"  Range: [{consistency['dft_range'][0]:.9f}, {consistency['dft_range'][1]:.9f}] Hz")
print(f"  Span: {consistency['dft_span']:.6e} Hz")

# Visualize consistency
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Pairwise differences histogram - NLS
ax = axes[0, 0]
nls_diffs = consistency['nls_pairwise_diffs']
ax.hist(nls_diffs, bins=30, edgecolor='black', alpha=0.7, color='steelblue')
ax.axvline(consistency['nls_statistics']['mean'], color='r', linestyle='--', 
           linewidth=2, label=f"Mean = {consistency['nls_statistics']['mean']:.6e} Hz")
ax.set_xlabel('Pairwise Frequency Difference (Hz)', fontsize=11)
ax.set_ylabel('Count', fontsize=11)
ax.set_title('NLS: Pairwise Frequency Differences', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

# Pairwise differences histogram - DFT
ax = axes[0, 1]
dft_diffs = consistency['dft_pairwise_diffs']
ax.hist(dft_diffs, bins=30, edgecolor='black', alpha=0.7, color='coral')
ax.axvline(consistency['dft_statistics']['mean'], color='r', linestyle='--', 
           linewidth=2, label=f"Mean = {consistency['dft_statistics']['mean']:.6e} Hz")
ax.set_xlabel('Pairwise Frequency Difference (Hz)', fontsize=11)
ax.set_ylabel('Count', fontsize=11)
ax.set_title('DFT: Pairwise Frequency Differences', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

# Frequency distribution across realizations
ax = axes[1, 0]
f_nls_all = np.array([r['f_nls'] for r in results])
f_dft_all = np.array([r['f_dft'] for r in results])
ax.hist(f_nls_all, bins=20, alpha=0.6, label=f"NLS (std={consistency['nls_std_across_realizations']:.6e})", 
        edgecolor='black', color='steelblue')
ax.hist(f_dft_all, bins=20, alpha=0.6, label=f"DFT (std={consistency['dft_std_across_realizations']:.6e})", 
        edgecolor='black', color='coral')
ax.axvline(consistency['nls_mean'], color='blue', linestyle='--', linewidth=2)
ax.axvline(consistency['dft_mean'], color='red', linestyle='--', linewidth=2)
ax.set_xlabel('Frequency (Hz)', fontsize=11)
ax.set_ylabel('Count', fontsize=11)
ax.set_title('Frequency Distribution Across Realizations', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

# Box plot comparison
ax = axes[1, 1]
bp = ax.boxplot([f_nls_all, f_dft_all], labels=['NLS', 'DFT'], 
                patch_artist=True, widths=0.6)
bp['boxes'][0].set_facecolor('steelblue')
bp['boxes'][0].set_alpha(0.7)
bp['boxes'][1].set_facecolor('coral')
bp['boxes'][1].set_alpha(0.7)
ax.set_ylabel('Frequency (Hz)', fontsize=11)
ax.set_title('Frequency Estimation Comparison (Box Plot)', fontsize=12)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()


## CRLB Comparison Analysis

Compare frequency estimation differences with the Cramér-Rao Lower Bound (CRLB).


In [None]:
# Perform CRLB comparison analysis
crlb_analysis = batch_analyzer.crlb_comparison_analysis()

print(f"CRLB Statistics:")
print(f"  Mean CRLB std: {crlb_analysis['crlb_statistics']['mean']:.6e} Hz")
print(f"  Min CRLB std: {crlb_analysis['crlb_statistics']['min']:.6e} Hz")
print(f"  Max CRLB std: {crlb_analysis['crlb_statistics']['max']:.6e} Hz")

if len(crlb_analysis['valid_ratios']) > 0:
    print(f"\nRatio Statistics (|Δf| / CRLB):")
    print(f"  Mean ratio: {crlb_analysis['ratio_statistics']['mean']:.4f}")
    print(f"  Median ratio: {crlb_analysis['ratio_statistics']['median']:.4f}")
    print(f"  Max ratio: {crlb_analysis['ratio_statistics']['max']:.4f}")
    print(f"  Min ratio: {crlb_analysis['ratio_statistics']['min']:.4f}")

# Visualize CRLB comparison
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Frequency differences vs CRLB
ax = axes[0, 0]
freq_diffs = crlb_analysis['frequency_diffs']
crlb_stds = crlb_analysis['crlb_stds']
ax.scatter(crlb_stds, freq_diffs, s=100, alpha=0.6, edgecolors='black')
# Plot 1:1 line
min_val = min(crlb_stds.min(), freq_diffs.min())
max_val = max(crlb_stds.max(), freq_diffs.max())
# Use a small margin for log scale
min_plot = min_val * 0.5
max_plot = max_val * 2.0
ax.loglog([min_plot, max_plot], [min_plot, max_plot], 'r--', linewidth=2, 
        label='Difference = CRLB')
# Set xlim and ylim to same range so 1:1 line is fully visible
ax.set_xlim([min_plot, max_plot])
ax.set_ylim([min_plot, max_plot])
ax.set_xlabel('CRLB std (Hz)', fontsize=11)
ax.set_ylabel('|f_NLS - f_DFT| (Hz)', fontsize=11)
ax.set_title('Frequency Difference vs CRLB', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

# Ratio histogram
ax = axes[0, 1]
valid_ratios = crlb_analysis['valid_ratios']
if len(valid_ratios) > 0:
    ax.hist(valid_ratios, bins=20, edgecolor='black', alpha=0.7, color='steelblue')
    ax.axvline(crlb_analysis['ratio_statistics']['mean'], color='r', 
               linestyle='--', linewidth=2, 
               label=f"Mean = {crlb_analysis['ratio_statistics']['mean']:.4f}")
    ax.axvline(1.0, color='g', linestyle='--', linewidth=2, 
               label='Ratio = 1.0')
    ax.set_xlabel('|Δf| / CRLB', fontsize=11)
    ax.set_ylabel('Count', fontsize=11)
    ax.set_title('Ratio Distribution (|Δf| / CRLB)', fontsize=12)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3, axis='y')
else:
    ax.text(0.5, 0.5, 'No valid ratios', ha='center', va='center', 
            transform=ax.transAxes, fontsize=12)

# CRLB distribution
ax = axes[1, 0]
ax.hist(crlb_stds, bins=20, edgecolor='black', alpha=0.7, color='steelblue')
ax.axvline(crlb_analysis['crlb_statistics']['mean'], color='r', 
           linestyle='--', linewidth=2, 
           label=f"Mean = {crlb_analysis['crlb_statistics']['mean']:.6e} Hz")
ax.set_xlabel('CRLB std (Hz)', fontsize=11)
ax.set_ylabel('Count', fontsize=11)
ax.set_title('CRLB Distribution', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

# Frequency differences distribution
ax = axes[1, 1]
ax.hist(freq_diffs, bins=20, edgecolor='black', alpha=0.7, color='coral')
ax.axvline(np.mean(freq_diffs), color='r', linestyle='--', linewidth=2, 
           label=f"Mean = {np.mean(freq_diffs):.6e} Hz")
ax.set_xlabel('|f_NLS - f_DFT| (Hz)', fontsize=11)
ax.set_ylabel('Count', fontsize=11)
ax.set_title('Frequency Difference Distribution', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()


## Summary Tables

Generate comprehensive summary tables of all analysis results.


In [None]:
# Generate summary table
summary = batch_analyzer.get_summary_table()
df_summary = pd.DataFrame(summary['data'])

print("Summary of Frequency Estimation Results:")
print("=" * 120)
print(df_summary.to_string(index=False))


In [None]:
# Generate consistency table
consistency_table = batch_analyzer.get_consistency_table()
df_consistency = pd.DataFrame(consistency_table['data'])

print("\nFrequency Estimates and Deviations from Mean:")
print("=" * 120)
print(df_consistency.to_string(index=False))


## Summary Statistics Overview

Display key statistics in a compact format.


In [None]:
# Create a comprehensive summary visualization
if len(results) > 0:
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    
    f_nls_all = np.array([r['f_nls'] for r in results])
    f_dft_all = np.array([r['f_dft'] for r in results])
    tau_all = np.array([r['tau_est'] for r in results])
    crlb_all = np.array([r['crlb_std_f'] for r in results])
    
    # 1. Frequency estimates over file index
    ax = axes[0, 0]
    indices = np.arange(len(results))
    ax.plot(indices, f_nls_all, 'o-', label='NLS', alpha=0.7, linewidth=2, markersize=6)
    ax.plot(indices, f_dft_all, 's-', label='DFT', alpha=0.7, linewidth=2, markersize=6)
    ax.axhline(consistency['nls_mean'], color='blue', linestyle='--', alpha=0.5)
    ax.axhline(consistency['dft_mean'], color='red', linestyle='--', alpha=0.5)
    ax.set_xlabel('File Index', fontsize=11)
    ax.set_ylabel('Frequency (Hz)', fontsize=11)
    ax.set_title('Frequency Estimates Across Files', fontsize=12)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # 2. Tau estimates
    ax = axes[0, 1]
    ax.plot(indices, tau_all, 'o-', color='green', alpha=0.7, linewidth=2, markersize=6)
    ax.axhline(np.mean(tau_all), color='green', linestyle='--', alpha=0.5, 
               label=f'Mean = {np.mean(tau_all):.2f} s')
    ax.set_xlabel('File Index', fontsize=11)
    ax.set_ylabel('Tau (s)', fontsize=11)
    ax.set_title('Tau Estimates Across Files', fontsize=12)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # 3. CRLB values
    ax = axes[0, 2]
    ax.plot(indices, crlb_all, 'o-', color='purple', alpha=0.7, linewidth=2, markersize=6)
    ax.axhline(np.mean(crlb_all), color='purple', linestyle='--', alpha=0.5, 
               label=f'Mean = {np.mean(crlb_all):.6e} Hz')
    ax.set_xlabel('File Index', fontsize=11)
    ax.set_ylabel('CRLB std (Hz)', fontsize=11)
    ax.set_title('CRLB Values Across Files', fontsize=12)
    ax.set_yscale('log')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # 4. Q factors
    if 'Q' in results[0]:
        q_all = np.array([r['Q'] for r in results])
        ax = axes[1, 0]
        ax.plot(indices, q_all, 'o-', color='orange', alpha=0.7, linewidth=2, markersize=6)
        ax.axhline(q_stats['mean'], color='orange', linestyle='--', alpha=0.5, 
                   label=f'Mean = {q_stats["mean"]:.2e}')
        ax.set_xlabel('File Index', fontsize=11)
        ax.set_ylabel('Q Factor', fontsize=11)
        ax.set_title('Q Factors Across Files', fontsize=12)
        ax.set_yscale('log')
        ax.legend(fontsize=10)
        ax.grid(True, alpha=0.3)
    
    # 5. Frequency difference vs file index
    ax = axes[1, 1]
    diffs = np.abs(f_nls_all - f_dft_all)
    ax.plot(indices, diffs, 'o-', color='red', alpha=0.7, linewidth=2, markersize=6)
    ax.axhline(np.mean(diffs), color='red', linestyle='--', alpha=0.5, 
               label=f'Mean = {np.mean(diffs):.6e} Hz')
    ax.set_xlabel('File Index', fontsize=11)
    ax.set_ylabel('|f_NLS - f_DFT| (Hz)', fontsize=11)
    ax.set_title('Frequency Differences Across Files', fontsize=12)
    ax.set_yscale('log')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # 6. Ratio of difference to CRLB
    if len(crlb_analysis['valid_ratios']) > 0:
        ratios = crlb_analysis['ratios']
        valid_mask = np.isfinite(ratios)
        valid_indices = indices[valid_mask]
        valid_ratios = ratios[valid_mask]
        ax = axes[1, 2]
        ax.plot(valid_indices, valid_ratios, 'o-', color='teal', alpha=0.7, 
                linewidth=2, markersize=6)
        ax.axhline(1.0, color='g', linestyle='--', linewidth=2, label='Ratio = 1.0')
        ax.axhline(crlb_analysis['ratio_statistics']['mean'], color='teal', 
                   linestyle='--', alpha=0.5, 
                   label=f'Mean = {crlb_analysis["ratio_statistics"]["mean"]:.4f}')
        ax.set_xlabel('File Index', fontsize=11)
        ax.set_ylabel('|Δf| / CRLB', fontsize=11)
        ax.set_title('Ratio of Difference to CRLB', fontsize=12)
        ax.legend(fontsize=10)
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\n" + "="*70)
    print("Analysis Complete!")
    print("="*70)
