In [1]:
import os
import re
import glob
import pandas as pd

In [2]:
# read all files name result_summary.txt from all folders in foler .results
files = glob.glob("./io500-claix-2023/results/*/result_summary.txt")

In [3]:
files

['./io500-claix-2023/results/2024.12.23-23.40.33/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.40.05/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.40.13/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.39.45/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.40.04/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.39.56/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.40.21/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.40.26/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.39.49/result_summary.txt',
 './io500-claix-2023/results/2024.12.23-23.40.16/result_summary.txt']

In [4]:
# print out content of files[0] file
with open(files[0], "r") as f:
    f_content = f.read()
    print(f_content)

IO500 version f754ac3f531a (standard)
[RESULT]       ior-easy-write        2.455826 GiB/s : time 343.992 seconds
[RESULT]    mdtest-easy-write       31.451299 kIOPS : time 1.538 seconds [INVALID]
[      ]            timestamp        0.000000 kIOPS : time 0.000 seconds
[RESULT]       ior-hard-write        0.423674 GiB/s : time 16.546 seconds [INVALID]
[RESULT]    mdtest-hard-write        6.548446 kIOPS : time 3.456 seconds [INVALID]
[RESULT]                 find      382.098935 kIOPS : time 0.087 seconds
[RESULT]        ior-easy-read        6.530513 GiB/s : time 129.341 seconds
[RESULT]     mdtest-easy-stat      137.648761 kIOPS : time 1.124 seconds
[RESULT]        ior-hard-read        0.271841 GiB/s : time 25.781 seconds
[RESULT]     mdtest-hard-stat      117.164648 kIOPS : time 1.148 seconds
[RESULT]   mdtest-easy-delete       42.708129 kIOPS : time 1.417 seconds
[RESULT]     mdtest-hard-read       31.405922 kIOPS : time 1.518 seconds
[RESULT]   mdtest-hard-delete        7.289304 kIOP

In [5]:
def parse_io500_file(io500_summary_content):
    """
    Parses the IO500 summary content and extracts results into a DataFrame.

    Parameters:
        io500_summary_content (str): The content of the IO500 summary file as a string.

    Returns:
        pd.DataFrame: A DataFrame containing the parsed results with columns
                      ['Test', 'Metric', 'Result', 'Time'] sorted by test name length.
    """
    data = []
    for line in io500_summary_content.splitlines():
        # Match lines with results
        match = re.match(
            r"\[RESULT\]\s+([\w\-]+)\s+([\d\.]+)\s+(.+)\s+:\s+time\s+([\d\.]+)\s+seconds.*",
            line,
        )
        if match:
            test_name = match.group(1)  # Test name (e.g., ior-easy-write)
            result = float(match.group(2))  # Numerical result
            metric = match.group(3)  # Metric (e.g., GiB/s, kIOPS)
            time = float(match.group(4))  # Time in seconds
            data.append([test_name, metric, result, time])

    # Create a DataFrame from parsed data
    return pd.DataFrame(data, columns=["Test", "Metric", "Result", "Time"], index=None).sort_values(
        by="Test", key=lambda col: col.str.len(), ascending=True
    ).reset_index(drop=True)

In [6]:
parse_io500_file(f_content)

Unnamed: 0,Test,Metric,Result,Time
0,find,kIOPS,382.098935,0.087
1,ior-easy-read,GiB/s,6.530513,129.341
2,ior-hard-read,GiB/s,0.271841,25.781
3,ior-easy-write,GiB/s,2.455826,343.992
4,ior-hard-write,GiB/s,0.423674,16.546
5,mdtest-easy-stat,kIOPS,137.648761,1.124
6,mdtest-hard-read,kIOPS,31.405922,1.518
7,mdtest-hard-stat,kIOPS,117.164648,1.148
8,mdtest-easy-write,kIOPS,31.451299,1.538
9,mdtest-hard-write,kIOPS,6.548446,3.456


In [7]:
io500_summary_df_list = []
for file in files:
    with open(file, "r") as f:
        f_content = f.read()
        io500_summary_df_list.append(parse_io500_file(f_content))

In [8]:
io500_summary_df_list[0]

Unnamed: 0,Test,Metric,Result,Time
0,find,kIOPS,382.098935,0.087
1,ior-easy-read,GiB/s,6.530513,129.341
2,ior-hard-read,GiB/s,0.271841,25.781
3,ior-easy-write,GiB/s,2.455826,343.992
4,ior-hard-write,GiB/s,0.423674,16.546
5,mdtest-easy-stat,kIOPS,137.648761,1.124
6,mdtest-hard-read,kIOPS,31.405922,1.518
7,mdtest-hard-stat,kIOPS,117.164648,1.148
8,mdtest-easy-write,kIOPS,31.451299,1.538
9,mdtest-hard-write,kIOPS,6.548446,3.456


In [14]:
result_summary_df = io500_summary_df_list[0][["Test", "Metric", "Result"]].copy()
result_summary_df.rename(columns={"Result": "Result_1"}, inplace=True)
result_summary_df

Unnamed: 0,Test,Metric,Result_1
0,find,kIOPS,382.098935
1,ior-easy-read,GiB/s,6.530513
2,ior-hard-read,GiB/s,0.271841
3,ior-easy-write,GiB/s,2.455826
4,ior-hard-write,GiB/s,0.423674
5,mdtest-easy-stat,kIOPS,137.648761
6,mdtest-hard-read,kIOPS,31.405922
7,mdtest-hard-stat,kIOPS,117.164648
8,mdtest-easy-write,kIOPS,31.451299
9,mdtest-hard-write,kIOPS,6.548446


In [15]:
for i, df in enumerate(io500_summary_df_list[1:], 2):
    result_summary_df[f"Result_{i}"] = df["Result"]

In [16]:
result_summary_df.sort_values(
        by="Test", key=lambda col: col.str.len(), ascending=True
    ).reset_index(drop=True)

Unnamed: 0,Test,Metric,Result_1,Result_2,Result_3,Result_4,Result_5,Result_6,Result_7,Result_8,Result_9,Result_10
0,find,kIOPS,382.098935,30.341691,280.907294,435.963585,238.150329,251.578971,389.464963,350.056689,437.703248,306.07099
1,ior-easy-read,GiB/s,6.530513,6.558521,6.551668,6.541544,6.551181,6.55206,6.557634,6.520246,6.541416,6.550253
2,ior-hard-read,GiB/s,0.271841,0.254355,0.26229,0.22055,0.241038,0.278692,0.375133,0.279891,0.29389,0.231835
3,ior-easy-write,GiB/s,2.455826,2.420256,2.445397,2.444939,2.438937,2.427936,2.311218,2.417063,2.356745,2.403937
4,ior-hard-write,GiB/s,0.423674,0.419858,0.387592,0.355009,0.461955,0.423341,0.429261,0.386626,0.397322,0.360819
5,mdtest-easy-stat,kIOPS,137.648761,137.255466,139.602765,138.052268,116.917249,125.730657,151.548171,120.635389,85.80928,128.240196
6,mdtest-hard-read,kIOPS,31.405922,30.204524,31.691271,30.11289,31.937272,30.796766,31.263243,31.213962,30.680315,30.738776
7,mdtest-hard-stat,kIOPS,117.164648,117.031837,111.06712,121.535279,122.512403,116.680832,147.154685,118.829119,112.643369,121.320399
8,mdtest-easy-write,kIOPS,31.451299,34.297154,32.83068,30.683907,32.198474,31.498597,28.923479,31.464099,10.083695,29.946948
9,mdtest-hard-write,kIOPS,6.548446,6.631946,6.705514,6.51055,6.500968,6.454765,4.896629,6.289798,6.203067,6.843561


In [17]:
result_summary_df.to_csv("./io500_results.csv", index=False)