In [None]:
import os
import pandas as pd

TEST_DATA_PATH = '../data/test/'
test_logs = os.listdir(TEST_DATA_PATH)

stats_list = []

for test_log in test_logs:
    # Load the test log as a DataFrame
    df = pd.read_csv(os.path.join(TEST_DATA_PATH, test_log))
    
    # Compute the number of events per case
    events_per_case = df.groupby('case_id').size()
    
    # Exclude cases with more than 150 events
    valid_cases = events_per_case[events_per_case <= 150].index
    df_filtered = df[df['case_id'].isin(valid_cases)]
    
    # Calculate key statistics on the filtered data
    num_cases = df_filtered['case_id'].nunique()
    total_events = len(df_filtered)
    filtered_events_per_case = df_filtered.groupby('case_id').size()
    avg_events_per_case = filtered_events_per_case.mean()
    max_trace_length = filtered_events_per_case.max()
    
    # Collect the metrics into a dictionary
    stats = {
        'Test Log': test_log,
        'Number of Cases': num_cases,
        'Total Events': total_events,
        'Average Events per Case': avg_events_per_case,
        'Max Trace Length': max_trace_length
    }
    stats_list.append(stats)

# Create a DataFrame with the collected statistics
stats_df = pd.DataFrame(stats_list)

In [None]:
print(stats_df.sort_values(by="Test Log").to_string(index=False))