In [74]:
!pip install pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [75]:
import re
import pandas as pd
from collections import defaultdict

def get_transaction_count(line):
    pattern = r"transactions = \{([^}]+)\}"
    match = re.search(pattern, line)
    if match:
        return set(re.findall(r"Transaction ([a-fA-F0-9]+)", match.group(1)))
    return set()

def get_timestamp(line):
    pattern = r"^\d+\.\d+"
    match = re.match(pattern, line)
    return float(match.group(0)) if match else None

def get_node_name(line):
    pattern = r"Node ([A-Za-z0-9]+)"
    match = re.search(pattern, line)
    return match.group(1) if match else None

def count_unique_mempool_transactions(file_path, node_number):
    unique_transactions = set()
    with open(file_path, 'r') as file:
        for line in file:
            if f"Node {node_number}" in line and "from mempool" in line:
                unique_transactions.update(re.findall(r"Transaction ([a-fA-F0-9]+)", line))
    return len(unique_transactions)

def process_log_lines(file_path):
    """
    Extracts **all** SCPExternalize messages per node and stores their relevant details.
    A new column 'Slot' is added which numbers the messages in the order they are read.
    """
    data = []

    with open(file_path, 'r') as file:
        for line in file:
            # Only consider lines that contain relevant externalization messages
            if ("appended SCPExternalize message to its storage and state" not in line and
                "adopting externalized value for slot" not in line):
                continue
            
            node_name = get_node_name(line)
            timestamp = get_timestamp(line)
            transactions = get_transaction_count(line)

            if node_name:
                data.append({
                    "node name": node_name,
                    "Timestamp of finalisation": timestamp,
                    "Finalised transactions": transactions,
                    "Externalize message": line.strip()
                })
    
    # Convert the collected data to a DataFrame
    df = pd.DataFrame(data)
    
    # For each node, assign a sequential slot number based on the order of appearance
    df['Slot'] = df.groupby('node name').cumcount() + 1
    
    # Count the number of finalized transactions for each externalize message
    df["No. of finalised transactions"] = df["Finalised transactions"].apply(len)
    
    # Compute total transactions for each node from mempool logs
    df["total_transactions"] = df["node name"].apply(lambda node: count_unique_mempool_transactions(file_path, node))
    
    # Calculate number of transactions not finalized for each node
    df["no. of transactions not finalised"] = df["total_transactions"] - df["No. of finalised transactions"]

    return df

# Example usage:
# file_path = 'path/to/your/simulator_events_log.txt'
# df = process_log_lines(file_path)
# print(df)


In [76]:
file_path = 'src/simulator_events_log.txt'

df = process_log_lines(file_path)


# df_sorted = df.sort_values(by='Timestamp of finalisation', ascending=True)

pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)  # Prevent line wrapping
pd.set_option('display.colheader_justify', 'center')  # Center column headers
pd.set_option('display.float_format', '{:.4f}'.format)  # Format float values

df = df.sort_values(by='Slot', ascending=True)

display(df)

Unnamed: 0,node name,Timestamp of finalisation,Finalised transactions,Externalize message,Slot,No. of finalised transactions,total_transactions,no. of transactions not finalised
0,GCWMNPDX5E7E5MWN566Z4HRYA4YICVIYXQ6XWE6QQDXXNW...,6.55,"{bc03b99b, b997c855}",6.55 - NODE - INFO - Node GCWMNPDX5E7E5MWN566Z...,1,2,22,20
64,GBFPTZT3BKTCCTZJSX6QQFKUUEJ2P5ZQZBRJJ44YU6N2OD...,18.53,{bc03b99b},18.53 - NODE - INFO - Node GBFPTZT3BKTCCTZJSX6...,1,1,25,24
63,GA2LBNNBXPLJIGRGGL3OXZ2BFRTZJB5AEP7NTDIHPSMDGU...,18.47,"{f015d6f5, bc03b99b, 1cf425b0}",18.47 - NODE - INFO - Node GA2LBNNBXPLJIGRGGL3...,1,3,21,18
62,GA2SFCVSNYHL7C5YHR3DFIC75JFKBY4T7AH6A7HZRTGJFV...,17.71,"{bc03b99b, 1cf425b0, b997c855, b5299c04}",17.71 - NODE - INFO - Node GA2SFCVSNYHL7C5YHR3...,1,4,26,22
60,GAYYHNMWZHM7KRJ4WKTEL3UIO3KG5HJGM5EBLN22XPBNXI...,17.56,"{bc03b99b, b5299c04}",17.56 - NODE - INFO - Node GAYYHNMWZHM7KRJ4WKT...,1,2,24,22
59,GD5JOWV72KZYUTAGFAWAZJDL7SWQHEU3OWDJXSKG2ZOU7A...,17.3,{bc03b99b},17.30 - NODE - INFO - Node GD5JOWV72KZYUTAGFAW...,1,1,27,26
58,GDUFFVLW4IJVVI2OMTFGCH3ECVHKKV5U6DJ2XWUBDSYGDA...,16.96,"{bc03b99b, 1cf425b0}",16.96 - NODE - INFO - Node GDUFFVLW4IJVVI2OMTF...,1,2,16,14
56,GBZSLUW7NHPXCJN7SIDUGDH754VFYDPXZII6S74EGV6I5Y...,16.77,{bc03b99b},16.77 - NODE - INFO - Node GBZSLUW7NHPXCJN7SID...,1,1,20,19
65,GCM6QMP3DLRPTAZW2UZPCPX2LF3SXWXKPMP3GKFZBDSF3Q...,18.59,"{bc03b99b, 1cf425b0}",18.59 - NODE - INFO - Node GCM6QMP3DLRPTAZW2UZ...,1,2,19,17
55,GBBTZWLPA77MRBGTRZCYGH7I2QMAPPJLMUE6BYABI4A6XU...,16.73,"{bc03b99b, 1cf425b0, b5299c04}",16.73 - NODE - INFO - Node GBBTZWLPA77MRBGTRZC...,1,3,21,18


## Analyze Transaction Matches across slots

In [77]:
def analyze_transaction_matches(df):
    tx_occurrences = {}

    for idx, row in df.iterrows():
        node = row['node name']
        slot = row['Slot']
        # row['Finalised transactions'] is a set; iterate through each transaction hash
        for tx in row['Finalised transactions']:
            if tx not in tx_occurrences:
                tx_occurrences[tx] = set()
            tx_occurrences[tx].add((node, slot))
    
    duplicates = {tx: occ for tx, occ in tx_occurrences.items() if len(occ) > 1} 

    return duplicates


duplicates = analyze_transaction_matches(df)
print("Transactions that appear in more than one slot:")
for tx, occ in duplicates.items():
    print(f"Transaction {tx} appears in: {sorted(list(occ))}")


Transactions that appear in more than one slot:
Transaction bc03b99b appears in: [('GA2LBNNBXPLJIGRGGL3OXZ2BFRTZJB5AEP7NTDIHPSMDGUFLHCCJBF6O', 1), ('GA2SFCVSNYHL7C5YHR3DFIC75JFKBY4T7AH6A7HZRTGJFVAC25GNEIUZ', 1), ('GA3FLRTZLNMBXCQ2GG4W2CO2WXWGDDROCD3KVD5QYMYB5NXBUYMO2QXT', 1), ('GA3SCGZAMPXNGUULWFUOCMGXRDPLGIMWH3ZSDQFCNEZATIJC77ZW7Z3Y', 1), ('GA4Y3HGTGHSX4XASTABDUM6UIJHTYY6DBODESEKVCYPLOTRMMR2UB5K7', 1), ('GA4Y3HGTGHSX4XASTABDUM6UIJHTYY6DBODESEKVCYPLOTRMMR2UB5K7', 2), ('GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJDHQWBR5NNK7', 1), ('GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJDHQWBR5NNK7', 2), ('GA6WDJK7HYWKLYHXYL5K6LK3TNRN5WDAS3LHSP2G7VHW4OX3S3XAPJD7', 1), ('GA7DV63PBUUWNUFAF4GAZVXU2OZMYRATDLKTC7VTCG7AU4XUPN5VRX4A', 1), ('GA7MEA44DK4WLPF4452CIS24ZOVLDVJN7Q3B4TK44MQXB6H6TKZIAPMC', 1), ('GAAV2GCVFLNN522ORUYFV33E76VPC22E72S75AQ6MBR5V45Z5DWVPWEU', 1), ('GAAV2GCVFLNN522ORUYFV33E76VPC22E72S75AQ6MBR5V45Z5DWVPWEU', 2), ('GABMKJM6I25XI4K7U6XWMULOUQIQ27BCTMLS6BYYSOWKTBUXVRJSXHYQ', 1), ('GABMKJ

ADD INTERLEDGER CHECKS

In [78]:
def calculate_inter_ledger_agreement_time(df):
    df = df.sort_values(by='Timestamp of finalisation')
    time_diffs = df['Timestamp of finalisation'].diff().dropna()
    
    return time_diffs.mean()

avg_time = calculate_inter_ledger_agreement_time(df)
print(f"Average Inter-Ledger Agreement Time: {avg_time}")

Average Inter-Ledger Agreement Time: 0.14552631578947367


In [79]:
final_experiment_df = df[[
    "sequence number",
    "Timestamp of finalisation",
    "No. of finalised transactions",
    "no. of transactions not finalised"
]]

display(final_experiment_df)

KeyError: "['sequence number'] not in index"

In [None]:
avg_difference = (final_experiment_df["no. of transactions not finalised"] - final_experiment_df["No. of finalised transactions"]).mean()

print(f"Average difference: {avg_difference}")

In [None]:
avg_finalised = final_experiment_df["No. of finalised transactions"].mean()
avg_total = (final_experiment_df["No. of finalised transactions"] + 
             final_experiment_df["no. of transactions not finalised"]).mean()

finalised_percentage = (avg_finalised / avg_total) * 100 if avg_total != 0 else 0

print(f"Percentage of finalised transactions vs total: {finalised_percentage:.2f}%")