In [139]:
!pip install pandas

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [140]:
import re

def get_transaction_count(line):
    # Regular expression to match the transaction block
    pattern = r"transactions = \{([^}]+)\}"
    
    # Search for transactions block in the line
    match = re.search(pattern, line)
    
    if match:
        # Extract transactions and find all unique transaction hashes
        transaction_hashes = set(re.findall(r"Transaction ([a-fA-F0-9]+)", match.group(1)))
        return len(transaction_hashes)  # Count unique transactions
    
    return 0

# Test with a sample line
line = "2.78 - NODE - INFO - Node 39 appended SCPExternalize message to its storage and state, message = SCPExternalize(ballot=SCPBallot(counter=1, value=[Value, hash = 5071816607284402923, state = State.init, transactions = {[Transaction fb9069c time = 0.0000], [Transaction 77d28c42 time = 0.0000], [Transaction 5ddddb02 time = 0.0000]}]), hCounter=1)"
transaction_count = get_transaction_count(line)
print(f"Number of transactions: {transaction_count}")

Number of transactions: 3


In [141]:
# Get timestamp of ledger
import re

def get_timestamp(line):
    pattern = r"^\d+\.\d+" # Regular expression to match the first number (timestamp) in the line
    
    match = re.match(pattern, line) # Search for the timestamp at the beginning of the line
    
    if match:
        return float(match.group(0))
    
    return None

# Test with a sample line
line = "2.78 - NODE - INFO - Node 39 appended SCPExternalize message to its storage and state, message = SCPExternalize(ballot=SCPBallot(counter=1, value=[Value, hash = 5071816607284402923, state = State.init, transactions = {[Transaction fb9069c time = 0.0000], [Transaction 77d28c42 time = 0.0000], [Transaction 5ddddb02 time = 0.0000]}]), hCounter=1)"
timestamp = get_timestamp(line)
print(f"Timestamp: {timestamp}")

Timestamp: 2.78


In [142]:
import re

def get_node_name(line):
    # Regular expression to match the node number after 'Node'
    pattern = r"Node ([A-Z0-9]+)"
    
    # Search for the node name in the line
    match = re.search(pattern, line)
    
    if match:
        return match.group(1)  # Return the alphanumeric node name
    
    return None

# Test with a sample line
line = "0.00 - NODE - INFO - Node GB4OX7UF6ZXUTINUH5CCRFTLJTEKPCCHIMCDEEPHZ7YMP2LXZBBJHQ3F retrieved [Transaction eacf177a time = 0.0000] from mempool."
node_name = get_node_name(line)
print(f"Node name: {node_name}")

Node name: GB4OX7UF6ZXUTINUH5CCRFTLJTEKPCCHIMCDEEPHZ7YMP2LXZBBJHQ3F


In [143]:
import re

def count_unique_mempool_transactions(file_path, node_number):
    unique_transactions = set()

    with open(file_path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        # Check if the line contains the specified node and 'from mempool'
        if node_number in line and "from mempool" in line:
            transaction_matches = re.findall(r"Transaction ([a-fA-F0-9]+)", line) # Extract transaction hashes
            unique_transactions.update(transaction_matches)

    return len(unique_transactions)

file_path = 'src/simulator_events_log.txt'
node = "Node 19"
unique_tx_count = count_unique_mempool_transactions(file_path, node)

print(f"Unique transactions retrieved from mempool for {node}: {unique_tx_count}")

Unique transactions retrieved from mempool for Node 19: 0


FINAL PROCESS LOG LINES FUNCTION

In [144]:
import pandas as pd

def process_log_lines(file_path):
    node_data = {}

    with open(file_path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        node_name = get_node_name(line)
        timestamp = get_timestamp(line)
        
        if node_name:
            node_data[node_name] = {
                "sequence number": node_name,
                "Timestamp of finalisation": timestamp
            }
    
    df = pd.DataFrame(node_data.values())
    
    return df

In [145]:
df = process_log_lines('src/simulator_events_log.txt')
print(len(df["sequence number"].unique()))

88


In [146]:
import pandas as pd

def process_log_lines(file_path):
    node_data = {}

    with open(file_path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        if 'appended SCPExternalize message to its storage and state' not in line:
            continue

        node_name = get_node_name(line)
        timestamp = get_timestamp(line)
        transaction_count = get_transaction_count(line)
        externalize_message = line

        if node_name:
            node_data[node_name] = {
                "sequence number": node_name,
                "Timestamp of finalisation": timestamp,
                "No. of finalised transactions": transaction_count,
                "Externalize message": externalize_message,
            }

    df = pd.DataFrame(node_data.values())

    df["total_transactions"] = df["sequence number"].apply(lambda node: count_unique_mempool_transactions(file_path, node))
    
    df["no. of transactions not finalised"] = df["total_transactions"] - df["No. of finalised transactions"]

    return df

In [147]:
file_path = 'src/simulator_events_log.txt'

df = process_log_lines(file_path)
df_sorted = df.sort_values(by='Timestamp of finalisation', ascending=True)

display(df_sorted)

KeyError: 'sequence number'

ADD INTERLEDGER CHECKS

In [None]:
def calculate_inter_ledger_agreement_time(df):
    df = df.sort_values(by='Timestamp of finalisation')
    time_diffs = df['Timestamp of finalisation'].diff().dropna()
    
    return time_diffs.mean()

avg_time = calculate_inter_ledger_agreement_time(df_sorted)
print(f"Average Inter-Ledger Agreement Time: {avg_time}")

In [None]:
final_experiment_df = df_sorted[[
    "sequence number",
    "Timestamp of finalisation",
    "No. of finalised transactions",
    "no. of transactions not finalised"
]]

display(final_experiment_df)

In [None]:
avg_difference = (final_experiment_df["no. of transactions not finalised"] - final_experiment_df["No. of finalised transactions"]).mean()

print(f"Average difference: {avg_difference}")

In [None]:
avg_finalised = final_experiment_df["No. of finalised transactions"].mean()
avg_total = (final_experiment_df["No. of finalised transactions"] + 
             final_experiment_df["no. of transactions not finalised"]).mean()

finalised_percentage = (avg_finalised / avg_total) * 100 if avg_total != 0 else 0

print(f"Percentage of finalised transactions vs total: {finalised_percentage:.2f}%")