# Script for Removing Outliers

### Load Datasets

In [1]:
import pandas as pd

datasets = ['Ed25519', 'Secp256k1']

# Import Files
dfs = {dataset: pd.read_csv(f"./collected-data/{dataset}.csv") for dataset in datasets}

### Remove Outliers

In [2]:
# Remove data outside from inter quatile range

for dataset in datasets:
    columns = ['Delegation Time Taken', 'Delegation Memory Usage', 'Verification Time Taken', 'Verification Memory Usage', 'Retrieval Time Taken', 'Retrieval Memory Usage']
    for column in columns:
        Q1 = dfs[dataset][column].quantile(0.25)
        Q3 = dfs[dataset][column].quantile(0.75)
        IQR = Q3 - Q1

        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        dfs[dataset] = dfs[dataset][(dfs[dataset][column] >= lower_bound) & (dfs[dataset][column] <= upper_bound)]

### Truncate All Dataset Size to Match Lowest Size

In [3]:
min_size = min(len(df) for df in dfs.values())

for dataset in datasets:
    dfs[dataset] = dfs[dataset].iloc[:min_size]

### Save Cleaned Datasets

In [4]:
for dataset in datasets:
    dfs[dataset].to_csv(f"./cleaned-data/{dataset}-cleaned.csv")