# Processing Original And Cleaned Data

### Import Both Datasets

In [2]:
import pandas as pd

datasets = ['Ed25519', 'Secp256k1']

# Import Files
original_dfs = {dataset: pd.read_csv(f"./collected-data/{dataset}.csv") for dataset in datasets}

cleaned_dfs = {dataset: pd.read_csv(f"./cleaned-data/{dataset}-cleaned.csv") for dataset in datasets}

### Percentage in Dataset Reduction

In [4]:
for dataset in datasets:
    original_size = len(original_dfs[dataset])
    cleaned_size = len(cleaned_dfs[dataset])
    reduction = ((original_size - cleaned_size) / original_size) * 100
    print(f"{dataset}: {original_size} → {cleaned_size} ({reduction:.2f}% removed)")

Ed25519: 101 → 73 (27.72% removed)
Secp256k1: 101 → 73 (27.72% removed)


In [None]:
delegationTimeTaken = []
delegationMemoryUsage = []
verificationTimeTaken = []
verificationMemoryUsage = []
retrievalTimeTaken = []
retrievalMemoryUsage = []

for dataset in datasets:
    delegationTimeTaken.append(
        cleaned_dfs[dataset]['Delegation Time Taken'].mean())
    delegationMemoryUsage.append(
        cleaned_dfs[dataset]['Delegation Memory Usage'].mean())
    verificationTimeTaken.append(
        cleaned_dfs[dataset]['Verification Time Taken'].mean())
    verificationMemoryUsage.append(
        cleaned_dfs[dataset]['Verification Memory Usage'].mean())
    retrievalTimeTaken.append(cleaned_dfs[dataset]['Retrieval Time Taken'].mean())
    retrievalMemoryUsage.append(cleaned_dfs[dataset]['Retrieval Memory Usage'].mean())

summary_df = pd.DataFrame({
    "Algorithm": ['Ed25519', 'Secp256k1'],
    "Delegation Time Taken": delegationTimeTaken,
    "Delegation Memory Usage": delegationMemoryUsage,
    "Verification Time Taken": verificationTimeTaken,
    "Verification Memory Usage": verificationMemoryUsage,
    "Retrieval Time Taken": retrievalTimeTaken,
    "Retrieval Memory Usage": retrievalMemoryUsage
})

summary_df.to_csv('./performance-summary-cleaned.csv')

In [6]:
delegationTimeTaken = []
delegationMemoryUsage = []
verificationTimeTaken = []
verificationMemoryUsage = []
retrievalTimeTaken = []
retrievalMemoryUsage = []

for dataset in datasets:
    delegationTimeTaken.append(
        original_dfs[dataset]['Delegation Time Taken'].mean())
    delegationMemoryUsage.append(
        original_dfs[dataset]['Delegation Memory Usage'].mean())
    verificationTimeTaken.append(
        original_dfs[dataset]['Verification Time Taken'].mean())
    verificationMemoryUsage.append(
        original_dfs[dataset]['Verification Memory Usage'].mean())
    retrievalTimeTaken.append(original_dfs[dataset]['Retrieval Time Taken'].mean())
    retrievalMemoryUsage.append(original_dfs[dataset]['Retrieval Memory Usage'].mean())

summary_df = pd.DataFrame({
    "Algorithm": ['Ed25519', 'Secp256k1'],
    "Delegation Time Taken": delegationTimeTaken,
    "Delegation Memory Usage": delegationMemoryUsage,
    "Verification Time Taken": verificationTimeTaken,
    "Verification Memory Usage": verificationMemoryUsage,
    "Retrieval Time Taken": retrievalTimeTaken,
    "Retrieval Memory Usage": retrievalMemoryUsage
})

summary_df.to_csv('./performance-summary.csv')