In [94]:
import os
import csv

SUMMARY_CSV = "simulation_summary.csv"
FIELDNAMES = [
    "node_count",
    "simulation_time",
    "sim_params",
    "total_tx_created",
    "total_slots",
    "total_tx_in_all_slots",
    "avg_txs_per_slot",
    "avg_inter_slot_time",
    "all_tests_passed",
]

def append_summary_row(row: dict):
    os.makedirs(os.path.dirname(SUMMARY_CSV) or ".", exist_ok=True)
    write_header = not os.path.isfile(SUMMARY_CSV)
    with open(SUMMARY_CSV, "a", newline="") as f:
        w = csv.DictWriter(f, fieldnames=FIELDNAMES)
        if write_header:
            w.writeheader()
        w.writerow(row)


In [95]:
import re
from collections import defaultdict
import pandas as pd

def get_transaction_count(line):
    pattern = r"transactions = \{([^}]+)\}"
    match = re.search(pattern, line)
    if match:
        return set(re.findall(r"Transaction ([a-fA-F0-9]+)", match.group(1)))
    return set()

def get_timestamp(line):
    pattern = r"^\d+\.\d+"
    match = re.match(pattern, line)
    return float(match.group(0)) if match else None

def get_node_name(line):
    pattern = r"Node ([A-Z0-9]+)"
    match = re.search(pattern, line)
    return match.group(1) if match else None


def process_log_lines(file_path):
    node_data = defaultdict(lambda: {
        "Timestamp of finalisation": None,
        "Finalised transactions": set(),
        "Externalize messages": []
    })
    with open(file_path, 'r') as file:
        lines = file.readlines()
    for line in lines:
        if ('appended SCPExternalize message' not in line
                and 'adopting externalized value for slot' not in line):
            continue
        node_name = get_node_name(line)
        timestamp = get_timestamp(line)
        transactions = get_transaction_count(line)
        if node_name:
            if node_data[node_name]["Timestamp of finalisation"] is None:
                node_data[node_name]["Timestamp of finalisation"] = timestamp
            node_data[node_name]["Finalised transactions"].update(transactions)
            node_data[node_name]["Externalize messages"].append(line.strip())
    df = pd.DataFrame.from_dict(node_data, orient='index')
    df.index.name = "sequence number"
    df = df.reset_index()
    df["No. of finalised transactions"] = df["Finalised transactions"].apply(len)
    return df

def extract_slot_finalisation_times(file_path):
    slot_times = {}
    pattern = re.compile(r"(\d+\.\d+).*?Node [A-Z0-9]+.*?(?:appended|adopting) externalize.*?slot (\d+)", re.IGNORECASE)
    with open(file_path, 'r') as file:
        for line in file:
            m = pattern.search(line)
            if m:
                timestamp = float(m.group(1))
                slot = int(m.group(2))
                # Only record the first externalize seen for each slot
                if slot not in slot_times:
                    slot_times[slot] = timestamp
    # Return sorted list of finalisation times by slot number
    return [slot_times[slot] for slot in sorted(slot_times)]


def compute_summary_metrics(events_log_path: str):
    mined_hashes = set()
    mining_pat = re.compile(r"\[Transaction ([A-Fa-f0-9]+) time = [\d\.]+\] mined to the mempool!")
    with open(events_log_path, 'r') as f:
        for line in f:
            m = mining_pat.search(line)
            if m:
                mined_hashes.add(m.group(1))
    total_tx_created = len(mined_hashes)
    df = process_log_lines(events_log_path)
    total_slots = df["Externalize messages"].apply(len).sum()
    all_finalized = set()
    for s in df["Finalised transactions"]:
        all_finalized.update(s)
    total_tx_in_all_slots = len(all_finalized)
    avg_txs_per_slot = (total_tx_in_all_slots / total_slots) if total_slots else 0.0
    
    
    slot_finalisation_times = extract_slot_finalisation_times(events_log_path)
    intervals = [t2 - t1 for t1, t2 in zip(slot_finalisation_times, slot_finalisation_times[1:])]
    avg_inter_slot_time = (sum(intervals) / len(intervals)) if intervals else 0.0


    return (
        total_tx_created,
        total_slots,
        total_tx_in_all_slots,
        avg_txs_per_slot,
        avg_inter_slot_time
    )


# EDIT THE "row" below with appropriate parameters

In [96]:
# Assume you already have the functions defined as above
log_file = "Runs for Tx creation scaling /ER_Topology 30% threshold/050/simulator_events_log.txt"

# Get your computed metrics
(total_tx_created,
 total_slots,
 total_tx_in_all_slots,
 avg_txs_per_slot,
 avg_inter_slot_time) = compute_summary_metrics(log_file)

row = {
    "node_count": 50,  # Set appropriately for your log
    "simulation_time": 100,  # Set appropriately for your log
    "sim_params": '{"n_nodes": 50, "sim_duration": 100, "mine: 0.}',
    "total_tx_created": total_tx_created,
    "total_slots": total_slots,
    "total_tx_in_all_slots": total_tx_in_all_slots,
    "avg_txs_per_slot": f"{avg_txs_per_slot:.2f}",
    "avg_inter_slot_time": f"{avg_inter_slot_time:.2f}",
    "all_tests_passed": True,  # Or set according to your logic
}
append_summary_row(row)
