In [4]:
import re
import json
import csv
from pathlib import Path
from collections import defaultdict


def parse_stats_file(filename):
    """Parse the ramulator stats file"""
    metrics = {}
    channel_data = defaultdict(dict)
    current_metric = None
    
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            
            # Pattern for main metrics: ramulator.metric_name VALUE # description
            match = re.match(r'^[\s]*([a-zA-Z_.0-9]+)[\s]+([\d.e+-]+)[\s]+#[\s]*(.*)$', line)
            if match:
                metric_name = match.group(1)
                value_str = match.group(2)
                description = match.group(3)
                
                try:
                    # Determine if it's an integer or a float
                    value = float(value_str)
                    if value_str.isdigit() and value_str.find('.') == -1 and 'e' not in value_str.lower():
                        value = int(value_str)
                except ValueError:
                    value = value_str # Keep as string if parsing fails
                
                metrics[metric_name] = {'value': value, 'description': description}
                current_metric = metric_name
            
            # Pattern for channel/indexed data: [INDEX] VALUE
            elif current_metric:
                # The regex is adjusted to catch the complex indices like [:] or [A] and handle float/int values
                match = re.match(r'^[\s]*\[(.+?)\][\s]+([\d.e+-]+)', line)
                if match:
                    index = match.group(1)
                    value_str = match.group(2)
                    try:
                        value = float(value_str)
                        if value_str.find('.') == -1 and 'e' not in value_str.lower():
                            value = int(value_str)
                    except ValueError:
                        value = value_str
                        
                    channel_data[current_metric][index] = value
    
    return metrics, dict(channel_data)


def calculate_stats(metrics):
    """Calculate key statistics including energy, BLP, and refresh proxies"""
    stats = {}

    # --- ENERGY & THROUGHPUT ---
    read_bw = metrics.get('ramulator.read_bandwidth', {}).get('value', 0)
    write_bw = metrics.get('ramulator.write_bandwidth', {}).get('value', 0)
    max_int_bw = metrics.get('ramulator.maximum_internal_bandwidth', {}).get('value', 1)
    max_link_bw = metrics.get('ramulator.maximum_link_bandwidth', {}).get('value', 1)

    stats['read_bandwidth_Bps'] = read_bw
    stats['write_bandwidth_Bps'] = write_bw
    stats['total_bandwidth_Bps'] = read_bw + write_bw
    stats['max_internal_bandwidth_Bps'] = max_int_bw
    stats['max_link_bandwidth_Bps'] = max_link_bw
    stats['bandwidth_utilization_perc'] = ((read_bw + write_bw) / max_int_bw * 100) if max_int_bw > 0 else 0

    # --- LATENCY ---
    stats['read_latency_avg_cycles'] = metrics.get('ramulator.read_latency_avg', {}).get('value', 0)
    stats['read_latency_ns_avg'] = metrics.get('ramulator.read_latency_ns_avg', {}).get('value', 0)
    stats['queueing_latency_avg_cycles'] = metrics.get('ramulator.queueing_latency_avg', {}).get('value', 0)
    stats['queueing_latency_ns_avg'] = metrics.get('ramulator.queueing_latency_ns_avg', {}).get('value', 0)
    
    # --- BANK LEVEL PARALLELISM (BLP) ---
    hits = metrics.get('ramulator.row_hits', {}).get('value', 0)
    misses = metrics.get('ramulator.row_misses', {}).get('value', 0)
    conflicts = metrics.get('ramulator.row_conflicts', {}).get('value', 0)
    total_bank_accesses = hits + misses + conflicts

    stats['row_hits'] = hits
    stats['row_misses'] = misses
    stats['row_conflicts'] = conflicts
    stats['row_hit_rate_perc'] = (hits / total_bank_accesses * 100) if total_bank_accesses > 0 else 0
    stats['row_miss_rate_perc'] = (misses / total_bank_accesses * 100) if total_bank_accesses > 0 else 0
    stats['row_conflict_rate_perc'] = (conflicts / total_bank_accesses * 100) if total_bank_accesses > 0 else 0

    # Queue Length is also an indicator of contention/utilization, related to BLP
    stats['avg_req_queue_length'] = metrics.get('ramulator.req_queue_length_avg', {}).get('value', 0)

    # --- REFRESH & ACTIVITY (Energy Proxies/Context) ---
    # Total cycles DRAM was active (serving R/W requests)
    stats['total_active_cycles_rw'] = metrics.get('ramulator.ramulator_active_cycles', {}).get('value', 0)
    # Total DRAM cycles simulated
    stats['dram_cycles_simulated'] = metrics.get('ramulator.dram_cycles', {}).get('value', 0)
    # Total DRAM capacity in bytes
    stats['dram_capacity_bytes'] = metrics.get('ramulator.dram_capacity', {}).get('value', 0)
    
    # Busy cycles includes refresh time (using Channel 0 busy cycles as representative for scale)
    stats['channel_0_busy_cycles_includes_refresh'] = metrics.get('ramulator.busy_cycles_0', {}).get('value', 0)
    
    # Raw Transaction Counts
    stats['read_requests_total'] = metrics.get('ramulator.read_requests', {}).get('value', 0)
    stats['write_requests_total'] = metrics.get('ramulator.write_requests', {}).get('value', 0)
    
    return stats


def print_summary(stats, channel_data):
    """Print nice summary with Energy, BLP, and Refresh metrics"""
    print("\n" + "="*70)
    print("RAMULATOR STATISTICS SUMMARY (EXTENDED)".center(70))
    print("="*70)

    print("\n‚ö° ENERGY & THROUGHPUT METRICS (Proxies for Energy)")
    print(f"  Max Internal BW: {stats['max_internal_bandwidth_Bps']:>15,} B/s ({stats['max_internal_bandwidth_Bps']/1e9:.4f} GB/s)")
    print(f"  Max Link BW:     {stats['max_link_bandwidth_Bps']:>15,} B/s ({stats['max_link_bandwidth_Bps']/1e9:.4f} GB/s)")
    print(f"  Read BW:         {stats['read_bandwidth_Bps']:>15,} B/s ({stats['read_bandwidth_Bps']/1e9:.4f} GB/s)")
    print(f"  Write BW:        {stats['write_bandwidth_Bps']:>15,} B/s ({stats['write_bandwidth_Bps']/1e9:.4f} GB/s)")
    print(f"  Total BW:        {stats['total_bandwidth_Bps']:>15,} B/s ({stats['total_bandwidth_Bps']/1e9:.4f} GB/s)")
    print(f"  BW Utilization:  {stats['bandwidth_utilization_perc']:>15.4f} %")
    
    print("\n‚è±Ô∏è LATENCY METRICS")
    print(f"  Avg Read Latency: {stats['read_latency_avg_cycles']:>15.2f} cycles")
    print(f"  Avg Read Latency: {stats['read_latency_ns_avg']:>15.4f} ns")
    print(f"  Avg Queue Latency: {stats['queueing_latency_avg_cycles']:>15.2f} cycles")
    print(f"  Avg Queue Latency: {stats['queueing_latency_ns_avg']:>15.4f} ns")

    print("\nüèõÔ∏è BANK LEVEL PARALLELISM (BLP) METRICS")
    total_bank_accesses = stats['row_hits'] + stats['row_misses'] + stats['row_conflicts']
    print(f"  Total Row Accesses: {total_bank_accesses:>15,}")
    print(f"  Row Hits:           {stats['row_hits']:>15,} ({stats['row_hit_rate_perc']:.2f}%)")
    print(f"  Row Misses:         {stats['row_misses']:>15,} ({stats['row_miss_rate_perc']:.2f}%)")
    print(f"  Row Conflicts:      {stats['row_conflicts']:>15,} ({stats['row_conflict_rate_perc']:.2f}%)")
    print(f"  Avg Req Queue Len:  {stats['avg_req_queue_length']:>15.6f}")
    
    print("\n  --- Incoming Request Distribution (First 4 Channels) ---")
    if 'ramulator.incoming_requests_per_channel' in channel_data:
        keys = list(channel_data['ramulator.incoming_requests_per_channel'].keys())
        sortable_keys = [k for k in keys if k.isdigit() or len(k) == 1]
        
        for k in sortable_keys[:4]:
            v = channel_data['ramulator.incoming_requests_per_channel'][k]
            if isinstance(v, (int, float)):
                 print(f"    Channel {k}: {v:>15,.0f}")
            else:
                 print(f"    Channel {k}: {v:>15}")
        
    print("\nüîÑ REFRESH & ACTIVITY METRICS")
    print(f"  DRAM Capacity:              {stats['dram_capacity_bytes']:>15,} bytes")
    print(f"  DRAM Cycles Simulated:      {stats['dram_cycles_simulated']:>15,}")
    print(f"  Total Active Cycles (R/W):  {stats['total_active_cycles_rw']:>15,}")
    print(f"  Channel 0 Busy Cycles:      {stats['channel_0_busy_cycles_includes_refresh']:>15,}")
    print("  *Note: 'Busy Cycles' includes both active time and refresh time. This is a crucial proxy for refresh overhead.")
    
    print("\n" + "="*70 + "\n")


def main():
    # Find all .txt files in the current directory
    current_dir = Path('.')
    txt_files = sorted(current_dir.glob('*.txt'))
    
    if not txt_files:
        print("‚ùå No .txt files found in the current directory!")
        return
    
    print(f"‚úì Found {len(txt_files)} .txt file(s):\n")
    for i, f in enumerate(txt_files, 1):
        print(f"  {i}. {f.name}")
    
    print("\n" + "="*70)
    
    # Process each file
    for stats_file in txt_files:
        print(f"\n{'='*70}")
        print(f"Processing: {stats_file.name}".center(70))
        print(f"{'='*70}")
        
        # Parse
        print(f"üìñ Parsing {stats_file.name}...")
        try:
            metrics, channel_data = parse_stats_file(stats_file)
            print(f"‚úì Parsed {len(metrics)} metrics")
        except Exception as e:
            print(f"‚ùå Error parsing {stats_file.name}: {e}")
            continue
        
        # Calculate stats
        stats = calculate_stats(metrics)
        
        # Print summary
        print_summary(stats, channel_data)
        
        # Save outputs
        base_name = stats_file.stem
        
        # Save JSON (all data)
        json_file = f"{base_name}_all_data.json"
        output_data = {
            'summary_stats': stats,
            'all_metrics': {k: v['value'] for k, v in metrics.items()},
            'channel_distribution': channel_data
        }
        with open(json_file, 'w') as f:
            json.dump(output_data, f, indent=2)
        print(f"üíæ Saved: {json_file}")
        
        # Save CSV (summary only)
        csv_file = f"{base_name}_summary.csv"
        with open(csv_file, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['Metric', 'Value'])
            for key, value in stats.items():
                writer.writerow([key, value])
        print(f"üíæ Saved: {csv_file}")
        
        # Save detailed CSV
        csv_detailed = f"{base_name}_detailed.csv"
        with open(csv_detailed, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['Metric', 'Value', 'Description'])
            for metric_name, data in sorted(metrics.items()):
                writer.writerow([metric_name, data['value'], data.get('description', '')])
        print(f"üíæ Saved: {csv_detailed}")
    
    print("\n" + "="*70)
    print("‚úÖ All files processed! Check the generated files.".center(70))
    print("="*70 + "\n")


if __name__ == '__main__':
    main()

‚úì Found 5 .txt file(s):

  1. kmeans_client_ryzen_ramulator_host.txt
  2. kmeans_pim_data_ramulator.txt
  3. kmeans_ramulator_network_ryzen.txt
  4. kmeans_ryzen_ramulator_pim_compute.txt
  5. kmeans_ryzen_ramulator_pim_full.txt


          Processing: kmeans_client_ryzen_ramulator_host.txt          
üìñ Parsing kmeans_client_ryzen_ramulator_host.txt...
‚úì Parsed 1712 metrics

               RAMULATOR STATISTICS SUMMARY (EXTENDED)                

‚ö° ENERGY & THROUGHPUT METRICS (Proxies for Energy)
  Max Internal BW: 640,000,000,000 B/s (640.0000 GB/s)
  Max Link BW:     480,000,000,000 B/s (480.0000 GB/s)
  Read BW:               3,540,747 B/s (0.0035 GB/s)
  Write BW:                      0 B/s (0.0000 GB/s)
  Total BW:              3,540,747 B/s (0.0035 GB/s)
  BW Utilization:           0.0006 %

‚è±Ô∏è LATENCY METRICS
  Avg Read Latency:           86.26 cycles
  Avg Read Latency:         69.0043 ns
  Avg Queue Latency:           10.74 cycles
  Avg Queue Latency:          8.590