In [19]:
!pip install pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [20]:
import re
import pandas as pd
from collections import defaultdict

def get_transaction_count(line):
    pattern = r"transactions = \{([^}]+)\}"
    match = re.search(pattern, line)
    if match:
        return set(re.findall(r"Transaction ([a-fA-F0-9]+)", match.group(1)))
    return set()

def get_timestamp(line):
    pattern = r"^\d+\.\d+"
    match = re.match(pattern, line)
    return float(match.group(0)) if match else None

def get_node_name(line):
    pattern = r"Node ([A-Za-z0-9]+)"
    match = re.search(pattern, line)
    return match.group(1) if match else None

def count_unique_mempool_transactions(file_path, node_number):
    unique_transactions = set()
    with open(file_path, 'r') as file:
        for line in file:
            if f"Node {node_number}" in line and "from mempool" in line:
                unique_transactions.update(re.findall(r"Transaction ([a-fA-F0-9]+)", line))
    return len(unique_transactions)

def process_log_lines(file_path):
    """
    Extracts **all** SCPExternalize messages per node and stores their relevant details.
    A new column 'Slot' is added which numbers the messages in the order they are read.
    """
    data = []

    with open(file_path, 'r') as file:
        for line in file:
            # Only consider lines that contain relevant externalization messages
            if ("appended SCPExternalize message to its storage and state" not in line and
                "adopting externalized value for slot" not in line):
                continue
            
            node_name = get_node_name(line)
            timestamp = get_timestamp(line)
            transactions = get_transaction_count(line)

            if node_name:
                data.append({
                    "node name": node_name,
                    "Timestamp of finalisation": timestamp,
                    "Finalised transactions": transactions,
                    "Externalize message": line.strip()
                })
    
    # Convert the collected data to a DataFrame
    df = pd.DataFrame(data)
    
    # For each node, assign a sequential slot number based on the order of appearance
    df['Slot'] = df.groupby('node name').cumcount() + 1
    
    # Count the number of finalized transactions for each externalize message
    df["No. of finalised transactions"] = df["Finalised transactions"].apply(len)
    
    # Compute total transactions for each node from mempool logs
    df["total_transactions"] = df["node name"].apply(lambda node: count_unique_mempool_transactions(file_path, node))
    
    # Calculate number of transactions not finalized for each node
    df["no. of transactions not finalised"] = df["total_transactions"] - df["No. of finalised transactions"]

    return df

# Example usage:
# file_path = 'path/to/your/simulator_events_log.txt'
# df = process_log_lines(file_path)
# print(df)


In [21]:
file_path = 'src/simulator_events_log.txt'

df = process_log_lines(file_path)


# df_sorted = df.sort_values(by='Timestamp of finalisation', ascending=True)

pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)  # Prevent line wrapping
pd.set_option('display.colheader_justify', 'center')  # Center column headers
pd.set_option('display.float_format', '{:.4f}'.format)  # Format float values


display(df)

Unnamed: 0,node name,Timestamp of finalisation,Finalised transactions,Externalize message,Slot,No. of finalised transactions,total_transactions,no. of transactions not finalised
0,Inez,8.21,{e470f8e2},8.21 - NODE - INFO - Node Inez appended SCPExt...,1,1,40,39
1,Dave,8.82,"{e470f8e2, 48afd4ea}",8.82 - NODE - INFO - Node Dave appended SCPExt...,1,2,40,38
2,Hank,13.84,{e470f8e2},13.84 - NODE - INFO - Node Hank appended SCPEx...,1,1,39,38
3,Alice,14.04,{e470f8e2},14.04 - NODE - INFO - Node Alice appended SCPE...,1,1,43,42
4,Fred,19.27,"{10b700b0, b223a4e6, 7c064074, a36ca28e}",19.27 - NODE - INFO - Node Fred appended SCPEx...,1,4,43,39
5,Elsie,19.6,"{e470f8e2, 48afd4ea}",19.60 - NODE - INFO - Node Elsie appended SCPE...,1,2,44,42
6,Bob,21.06,"{e470f8e2, 36f3824c}",21.06 - NODE - INFO - Node Bob appended SCPExt...,1,2,43,41
7,Alice,26.85,"{d8e9ec31, b96df974, 7c064074, a36ca28e}",26.85 - NODE - INFO - Node Alice appended SCPE...,2,4,43,39
8,Fred,30.88,{e470f8e2},30.88 - NODE - INFO - Node Fred appended SCPEx...,2,1,43,42
9,Hank,31.87,{e470f8e2},31.87 - NODE - INFO - Node Hank appended SCPEx...,2,1,39,38


## Analyze Transaction Matches across slots

In [22]:
def analyze_transaction_matches(df):
    tx_occurrences = {}

    for idx, row in df.iterrows():
        node = row['node name']
        slot = row['Slot']
        # row['Finalised transactions'] is a set; iterate through each transaction hash
        for tx in row['Finalised transactions']:
            if tx not in tx_occurrences:
                tx_occurrences[tx] = set()
            tx_occurrences[tx].add((node, slot))
    
    duplicates = {tx: occ for tx, occ in tx_occurrences.items() if len(occ) > 1} 

    return duplicates


duplicates = analyze_transaction_matches(df)
print("Transactions that appear in more than one slot:")
for tx, occ in duplicates.items():
    print(f"Transaction {tx} appears in: {sorted(list(occ))}")


Transactions that appear in more than one slot:
Transaction e470f8e2 appears in: [('Alice', 1), ('Bob', 1), ('Bob', 3), ('Dave', 1), ('Elsie', 1), ('Elsie', 3), ('Fred', 2), ('Fred', 3), ('Hank', 1), ('Hank', 2), ('Inez', 1), ('John', 1), ('John', 2), ('John', 3)]
Transaction 48afd4ea appears in: [('Dave', 1), ('Elsie', 1), ('Elsie', 3), ('John', 1), ('John', 2), ('John', 3)]
Transaction 10b700b0 appears in: [('Bob', 2), ('Bob', 3), ('Carol', 1), ('Carol', 3), ('Dave', 2), ('Dave', 3), ('Dave', 4), ('Elsie', 2), ('Elsie', 3), ('Fred', 1), ('Gwen', 1), ('Gwen', 2), ('Gwen', 3), ('Hank', 3)]
Transaction b223a4e6 appears in: [('Bob', 2), ('Bob', 3), ('Carol', 1), ('Carol', 3), ('Dave', 2), ('Dave', 3), ('Dave', 4), ('Elsie', 2), ('Elsie', 3), ('Fred', 1), ('Gwen', 1), ('Gwen', 2), ('Gwen', 3), ('Hank', 3), ('Inez', 2), ('Inez', 3), ('John', 2)]
Transaction 7c064074 appears in: [('Alice', 2), ('Alice', 3), ('Bob', 2), ('Bob', 3), ('Carol', 1), ('Carol', 3), ('Dave', 2), ('Dave', 3), ('Dave

ADD INTERLEDGER CHECKS

In [23]:
def calculate_inter_ledger_agreement_time(df):
    df = df.sort_values(by='Timestamp of finalisation')
    time_diffs = df['Timestamp of finalisation'].diff().dropna()
    
    return time_diffs.mean()

avg_time = calculate_inter_ledger_agreement_time(df)
print(f"Average Inter-Ledger Agreement Time: {avg_time}")

Average Inter-Ledger Agreement Time: 2.102258064516129


In [24]:
final_experiment_df = df[[
    "sequence number",
    "Timestamp of finalisation",
    "No. of finalised transactions",
    "no. of transactions not finalised"
]]

display(final_experiment_df)

KeyError: "['sequence number'] not in index"

In [None]:
avg_difference = (final_experiment_df["no. of transactions not finalised"] - final_experiment_df["No. of finalised transactions"]).mean()

print(f"Average difference: {avg_difference}")

In [None]:
avg_finalised = final_experiment_df["No. of finalised transactions"].mean()
avg_total = (final_experiment_df["No. of finalised transactions"] + 
             final_experiment_df["no. of transactions not finalised"]).mean()

finalised_percentage = (avg_finalised / avg_total) * 100 if avg_total != 0 else 0

print(f"Percentage of finalised transactions vs total: {finalised_percentage:.2f}%")