# Processing Original And Cleaned Data

### Import Both Datasets

In [5]:
import pandas as pd

datasets = ['supervisorStudent', 'supervisorStudentProtocol', 'doctorNurse', 'doctorNurseProtocol', 'employeeProtocol']

# Import Files
original_dfs = {dataset: pd.read_csv(f"./collected-data/{dataset}.csv") for dataset in datasets}

cleaned_dfs = {dataset: pd.read_csv(f"./cleaned-data/{dataset}-cleaned.csv") for dataset in datasets}

### Percentage in Dataset Reduction

In [6]:
for dataset in datasets:
    original_size = len(original_dfs[dataset])
    cleaned_size = len(cleaned_dfs[dataset])
    reduction = ((original_size - cleaned_size) / original_size) * 100
    print(f"{dataset}: {original_size} → {cleaned_size} ({reduction:.2f}% removed)")

supervisorStudent: 101 → 70 (30.69% removed)
supervisorStudentProtocol: 101 → 70 (30.69% removed)
doctorNurse: 101 → 70 (30.69% removed)
doctorNurseProtocol: 101 → 70 (30.69% removed)
employeeProtocol: 101 → 70 (30.69% removed)


### Calculate Average Time Taken for Delegation, Verification and Total

In [7]:
delegationTimeTaken = []
verificationTimeTaken = []
totalTimeTaken = []

for dataset in datasets:
    delegationTimeTaken.append(
        cleaned_dfs[dataset]['Delegation Time Taken'].mean())
    verificationTimeTaken.append(
        cleaned_dfs[dataset]['Verification Time Taken'].mean())
    totalTimeTaken.append(cleaned_dfs[dataset]['Total Time Taken'].mean())

summary_df = pd.DataFrame({
    "Use Case": ['Supervisor Student', 'Supervisor Student Protocol', 'Doctor Nurse', 'Doctor Nurse Protocol', 'Employee Protocol'],
    "Delegation Time Taken": delegationTimeTaken,
    "Verification Time Taken": verificationTimeTaken,
    "Total Time Taken": totalTimeTaken
})

summary_df.to_csv('./summary-cleaned.csv')

In [8]:
delegationTimeTaken = []
verificationTimeTaken = []
totalTimeTaken = []

for dataset in datasets:
    delegationTimeTaken.append(
        original_dfs[dataset]['Delegation Time Taken'].mean())
    verificationTimeTaken.append(
        original_dfs[dataset]['Verification Time Taken'].mean())
    totalTimeTaken.append(original_dfs[dataset]['Total Time Taken'].mean())

summary_df = pd.DataFrame({
    "Use Case": ['Supervisor Student', 'Supervisor Student Protocol', 'Doctor Nurse', 'Doctor Nurse Protocol', 'Employee Protocol'],
    "Delegation Time Taken": delegationTimeTaken,
    "Verification Time Taken": verificationTimeTaken,
    "Total Time Taken": totalTimeTaken
})

summary_df.to_csv('./summary.csv')