In [3]:
import re
import csv
import os

In [145]:
def parse_ycsb_log(directory):
    data = []

    for filename in os.listdir(directory):
        if filename.endswith(".bin"):
            with open(os.path.join(directory, filename), "r", encoding="ISO-8859-1") as f:
                lines = f.readlines()

                data_dict = {
                    "Timestamp": None,
                    "Database": None,
                    "WorkloadType": None,
                    "Operations": None,
                    "AverageLatency(us)": None,
                    "MinLatency(us)": None,
                    "MaxLatency(us)": None,
                    "95thPercentileLatency(us)": None,
                    "99thPercentileLatency(us)": None
                }

                workload_type = None
                database = None
                for line in lines:
                    timestamp_match = re.search(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})", line)
                    if timestamp_match:
                        timestamp = timestamp_match.group(1)

                    workload_match = re.search(r"(.*)ycsb load (\w+)", line)
                    if workload_match:
                        database = workload_match.group(2)
                        workload_type = re.search(r"(.*)workloads/workload([a-z])(.*)", line).group(2)

                    if workload_type and database:
                        result_match = re.search(r"\[(INSERT)\], ([\w\(\)/%]+), ([\w.]+)\\par", line)
                        if result_match:
                            metric_name = result_match.group(2)
                            metric_value = float(result_match.group(3))

                            if metric_name in data_dict.keys():
                                data_dict["Timestamp"] = timestamp
                                data_dict["Database"] = database
                                data_dict["WorkloadType"] = workload_type
                                data_dict[metric_name] = metric_value
                            else:
                                print(f"Not found {metric_name}")

                data.append(data_dict)

    with open('ycsb_report.csv', 'w', newline='') as csvfile:
        fieldnames = data[0].keys()
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for row_dict in data:
            writer.writerow(row_dict)


In [146]:
log_directory = './logs/'
parse_ycsb_log(log_directory)