# Add Kraus Data to Adams evaluation

In [4]:
import shutil
import gzip
import zipfile
from pathlib import Path

# Config
source_base = Path("concept-drift-characterization/evaluation_paper/data_collection/datasets_evaluation")
destination = Path("cdrift-evaluation/EvaluationLogs/Kraus")
noise_levels = ['without_noise', 'with_noise_5', 'with_noise_10']

# Ensure destination exists
destination.mkdir(parents=True, exist_ok=True)

# Process each noise level
for level in noise_levels:
    level_path = source_base / level
    for zip_path in level_path.glob("*.zip"):
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            # Extract to temp folder
            temp_dir = level_path / "temp_unzip"
            temp_dir.mkdir(exist_ok=True)
            zip_ref.extractall(temp_dir)

            for xes_file in temp_dir.glob("*.xes"):
                # Build new filename with noise level prefix
                new_name = f"{level}__{xes_file.name}"
                gz_path = destination / f"{new_name}.gz"

                # Compress and copy
                with open(xes_file, 'rb') as f_in:
                    with gzip.open(gz_path, 'wb') as f_out:
                        shutil.copyfileobj(f_in, f_out)

            # Clean up
            shutil.rmtree(temp_dir)


In [11]:
# copy gold standard file to Kraus folder
gold_standard_file = source_base / "gold_standard.csv"
shutil.copy2(gold_standard_file, destination / "gold_standard.csv")

WindowsPath('cdrift-evaluation/EvaluationLogs/Kraus/gold_standard.csv')

In [12]:
import pandas as pd

# Edit gold standard file to include all noise levels and reflect xes.gz

# Load original gold_standard.csv
gold_path = source_base / "gold_standard.csv"
df = pd.read_csv(gold_path)

# Create new dataframe for all noise levels
augmented_rows = []

for level in noise_levels:
    df_copy = df.copy()
    df_copy["log_name"] = df_copy["log_name"].apply(lambda name: f"{level}__{name}")
    df_copy["log_name"] = df_copy["log_name"].str.replace(".xes", ".xes.gz", regex=False)
    df_copy["noise_level"] = level
    augmented_rows.append(df_copy)

# Concatenate all rows and write to destination
final_df = pd.concat(augmented_rows, ignore_index=True)
final_df.to_csv(destination / "gold_standard.csv", index=False)