In [1]:
!uv pip install numpy pandas tinyflux flatten_dict tqdm

[2mAudited [1m5 packages[0m [2min 3ms[0m[0m


In [2]:
import numpy as np 
import pandas as pd 
import os
from tqdm import tqdm


In [5]:
from datetime import datetime

import numpy as np
from flatten_dict import unflatten
from tinyflux import TinyFlux


def revert_unit(unit_conversion, converted_report: dict):
    original_report = converted_report.copy()
    for key, value in converted_report.items():
        if "unit" in key:
            if "frequency" in key:
                for original_unit, converted_unit in unit_conversion[
                    "frequency"
                ].items():
                    if converted_unit == value:
                        original_report[key] = original_unit
                        break
            elif "mem" in key:
                for original_unit, converted_unit in unit_conversion["mem"].items():
                    if converted_unit == value:
                        original_report[key] = original_unit
                        break
            elif "cpu" in key:
                if "usage" in key:
                    for original_unit, converted_unit in unit_conversion["cpu"][
                        "usage"
                    ].items():
                        if converted_unit == value:
                            original_report[key] = original_unit
                            break
            elif "gpu" in key:
                if "usage" in key:
                    for original_unit, converted_unit in unit_conversion["gpu"][
                        "usage"
                    ].items():
                        if converted_unit == value:
                            original_report[key] = original_unit
                            break
    return original_report


def extract_data(data):
    unit_conversion = {
        "cpu": {"usage": {"milicpu": 1, "cputime": 2, "percentage": 3}},
        "gpu": {"usage": {"percentage": 1}},
        "mem": {"Gb": 1, "Mb": 2, "Kb": 3},
        "frequency": {"GHz": 1, "MHz": 2},
    }
    converted_process_data = {}
    converted_system_data = []
    for datapoint in data:
        if datapoint.tags["type"] == "process":
            converted_datapoint = {
                **datapoint.tags,
                **datapoint.fields,
                "timestamp": datetime.timestamp(datapoint.time),
            }
            converted_datapoint = unflatten(
                revert_unit(unit_conversion, converted_datapoint), "dot"
            )
            pid = converted_datapoint["metadata"]["pid"]
            if pid in converted_process_data:
                converted_process_data[pid].append(converted_datapoint)
            else:
                converted_process_data[pid] = [converted_datapoint]
        else:
            converted_datapoint = {
                **datapoint.tags,
                **datapoint.fields,
                "timestamp": datetime.timestamp(datapoint.time),
            }
            converted_datapoint = unflatten(
                revert_unit(unit_conversion, converted_datapoint), "dot"
            )
            converted_system_data.append(converted_datapoint)

    return converted_process_data, converted_system_data


def extract_data_from_file_path(file_path: str):
    db = TinyFlux(file_path)
    data = db.all()
    return extract_data(data)

In [22]:
file_path = "runs_32/run_1-task_pc-cpu_fifo/metric_database/nid005024.csv"
try:
    process_data, system_data = extract_data_from_file_path(file_path)
    time_taken += len(system_data)
    for entity in system_data:
        cpu_usage = entity["cpu"]["usage"]["value"]
        filtered_usage = [
            usage for core, usage in cpu_usage.items() if int(core.split("_")[1]) < 64
        ]

        total_usage += sum(filtered_usage)
        total_cores += len(filtered_usage)
except Exception as e:
    print(e)
    print(entity)

In [16]:
len(system_data)

718

In [42]:
test = "run_1-task_pc-cpu_round-robin"
test.split("_")

['run', '1-task', 'pc-cpu', 'round-robin']

In [59]:
node_config_list = ["4"]
algorithm = ["best_fit", "round_robin", "priority", "fifo"]

# Create an empty list to store results
results = []

for numb_node in node_config_list:
    for folder in tqdm(sorted(os.listdir(f"runs_{numb_node}"), reverse=1)):
        sub_folder = os.path.join(f"runs_{numb_node}", folder, "metric_database")
        if not os.path.exists(sub_folder):
            continue
        total_usage = 0
        total_cores = 0
        time_taken = 0
        folder_split = folder.split("_")
        for file in os.listdir(sub_folder):
            if file.endswith(".csv"):
                file_path = os.path.join(sub_folder, file)
                process_data, system_data = extract_data_from_file_path(file_path)
                time_taken += len(system_data)
                for entity in system_data:
                    cpu_usage = entity["cpu"]["usage"]["value"]
                    filtered_usage = [
                        usage
                        for core, usage in cpu_usage.items()
                        if int(core.split("_")[1]) < 64
                    ]

                    total_usage += sum(filtered_usage)
                    total_cores += len(filtered_usage)

        overall_average_usage = total_usage / total_cores if total_cores > 0 else 0

        results.append(
            [
                numb_node,
                folder_split[2],
                folder_split[3],
                overall_average_usage,
                time_taken / int(numb_node),
            ]
        )

df = pd.DataFrame(
    results,
    columns=[
        "Numb node",
        "Optasks",
        "Algorithm",
        "Average CPU Usage (%)",
        "Time taken (s)",
    ],
)

print(df)

100%|████████████████████████████████████████████████████████████████████| 15/15 [00:35<00:00,  2.37s/it]

   Numb node               Optasks    Algorithm  Average CPU Usage (%)  \
0          4                                                 17.785998   
1          4  reduce-data-movement     priority              18.695302   
2          4  reduce-data-movement         fifo              18.051298   
3          4  reduce-data-movement     best-fit              18.372785   
4          4                reduce  round-robin              18.448070   
5          4                reduce     priority              17.426560   
6          4                reduce         fifo              17.893591   
7          4                reduce     best-fit              17.996417   
8          4                pc-cpu  round-robin              30.091290   
9          4                pc-cpu     priority              30.500693   
10         4                pc-cpu         fifo              29.817168   
11         4                pc-cpu     best-fit              30.398761   
12         4         data-movement  ro




In [23]:
node_config_list = ["32"]
# Create an empty list to store results
results = []

for numb_node in node_config_list:
    for folder in tqdm(sorted(os.listdir(f"runs_{numb_node}"), reverse=1)):
        sub_folder = os.path.join(f"runs_{numb_node}", folder, "metric_database")
        if not os.path.exists(sub_folder):
            continue
        total_usage = 0
        total_cores = 0
        time_taken = 0
        folder_split = folder.split("_")
        for file in os.listdir(sub_folder):
            if file.endswith(".csv"):
                file_path = os.path.join(sub_folder, file)
                try:
                    process_data, system_data = extract_data_from_file_path(file_path)
                    time_taken += len(system_data)
                    for entity in system_data:
                        cpu_usage = entity["cpu"]["usage"]["value"]
                        filtered_usage = [
                            usage
                            for core, usage in cpu_usage.items()
                            if int(core.split("_")[1]) < 64
                        ]

                        total_usage += sum(filtered_usage)
                        total_cores += len(filtered_usage)
                except Exception as e:
                    print(e)
                    print(entity)
                    print(file_path)

        overall_average_usage = total_usage / total_cores if total_cores > 0 else 0
        results.append(
            [
                numb_node,
                folder_split[2],
                folder_split[3],
                overall_average_usage,
                time_taken / int(numb_node),
            ]
        )

df = pd.DataFrame(
    results,
    columns=[
        "Numb node",
        "Optasks",
        "Algorithm",
        "Average CPU Usage (%)",
        "Time taken (s)",
    ],
)

print(df)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [02:27<00:00, 18.40s/it]

  Numb node Optasks    Algorithm  Average CPU Usage (%)  Time taken (s)
0        32  reduce  round-robin              17.519794       719.46875
1        32  reduce     priority              17.211960       728.93750
2        32  reduce         fifo              17.035072       722.96875
3        32  reduce     best-fit              17.450913       707.03125
4        32  pc-cpu  round-robin              29.991062       710.90625
5        32  pc-cpu     priority              29.856786       711.06250
6        32  pc-cpu         fifo              29.908088       701.00000
7        32  pc-cpu     best-fit              29.904649       696.93750





In [45]:
latex_table = df.to_latex(index=False, float_format="%.2f")
with open("cpu_usage_table.tex", "w") as f:
    f.write(latex_table)

# Print LaTeX table
print(latex_table)

\begin{tabular}{lllr}
\toprule
Numb node & Optasks & Algorithm & Average CPU Usage (%) \\
\midrule
4 &  &  & 17.79 \\
4 & reduce & round-robin & 18.45 \\
4 & reduce & priority & 17.43 \\
4 & reduce & fifo & 17.89 \\
4 & reduce & best-fit & 18.00 \\
4 & pc-cpu & round-robin & 30.09 \\
4 & pc-cpu & priority & 30.50 \\
4 & pc-cpu & fifo & 29.82 \\
4 & pc-cpu & best-fit & 30.40 \\
\bottomrule
\end{tabular}

