From 20b8f1b493f5973067cf2be56d4289fca0211d06 Mon Sep 17 00:00:00 2001
From: Alexander Nesterov <nesterov.alexander@outlook.com>
Date: Fri, 10 Oct 2025 19:32:37 +0200
Subject: [PATCH 1/5] refactor performance data handling and output generation

---
 scoreboard/main.py           |  72 ++++++--
 scripts/create_perf_table.py | 333 +++++++++++++++++++----------------
 2 files changed, 239 insertions(+), 166 deletions(-)

diff --git a/scoreboard/main.py b/scoreboard/main.py
index 25c1206d..31976212 100644
--- a/scoreboard/main.py
+++ b/scoreboard/main.py
@@ -108,26 +108,48 @@ def discover_tasks(tasks_dir, task_types):
 directories, tasks_type_map = discover_tasks(tasks_dir, task_types)
 
 
-def load_performance_data(perf_stat_file_path):
-    """Load and parse performance statistics from CSV file."""
+def load_performance_data_threads(perf_stat_file_path: Path) -> dict:
+    """Load threads performance ratios (T_x/T_seq) from CSV.
+    Expected header: Task, SEQ, OMP, TBB, STL, ALL
+    """
+    perf_stats: dict[str, dict] = {}
+    if perf_stat_file_path.exists():
+        with open(perf_stat_file_path, "r", newline="") as csvfile:
+            reader = csv.DictReader(csvfile)
+            for row in reader:
+                task_name = row.get("Task")
+                if not task_name:
+                    continue
+                perf_stats[task_name] = {
+                    "seq": row.get("SEQ", "?"),
+                    "omp": row.get("OMP", "?"),
+                    "tbb": row.get("TBB", "?"),
+                    "stl": row.get("STL", "?"),
+                    "all": row.get("ALL", "?"),
+                }
+    else:
+        logger.warning("Threads perf stats CSV not found at %s", perf_stat_file_path)
+    return perf_stats
 
-    perf_stats = dict()
+
+def load_performance_data_processes(perf_stat_file_path: Path) -> dict:
+    """Load processes performance ratios (T_x/T_seq) from CSV.
+    Expected header: Task, SEQ, MPI
+    """
+    perf_stats: dict[str, dict] = {}
     if perf_stat_file_path.exists():
         with open(perf_stat_file_path, "r", newline="") as csvfile:
             reader = csv.DictReader(csvfile)
             for row in reader:
                 task_name = row.get("Task")
-                if task_name:
-                    perf_stats[task_name] = {
-                        "seq": row.get("SEQ", "?"),
-                        "omp": row.get("OMP", "?"),
-                        "tbb": row.get("TBB", "?"),
-                        "stl": row.get("STL", "?"),
-                        "all": row.get("ALL", "?"),
-                        "mpi": "N/A",
-                    }
+                if not task_name:
+                    continue
+                perf_stats[task_name] = {
+                    "seq": row.get("SEQ", "?"),
+                    "mpi": row.get("MPI", "?"),
+                }
     else:
-        logger.warning("Performance stats CSV not found at %s", perf_stat_file_path)
+        logger.warning("Processes perf stats CSV not found at %s", perf_stat_file_path)
     return perf_stats
 
 
@@ -652,15 +674,29 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
         ds = _evenly_spaced_dates(n_items, s, e)
         return ds
 
-    # Locate perf CSV from CI or local runs
-    candidates = [
+    # Locate perf CSVs from CI or local runs (threads and processes)
+    candidates_threads = [
+        script_dir.parent / "build" / "perf_stat_dir" / "threads_task_run_perf_table.csv",
+        script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv",
+        # Fallback to old single-file name
         script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv",
         script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv",
     ]
-    perf_stat_file_path = next((p for p in candidates if p.exists()), candidates[0])
+    threads_csv = next((p for p in candidates_threads if p.exists()), candidates_threads[0])
 
-    # Read and parse performance statistics CSV
-    perf_stats = load_performance_data(perf_stat_file_path)
+    candidates_processes = [
+        script_dir.parent / "build" / "perf_stat_dir" / "processes_task_run_perf_table.csv",
+        script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv",
+    ]
+    processes_csv = next((p for p in candidates_processes if p.exists()), candidates_processes[0])
+
+    # Read and merge performance statistics CSVs
+    perf_stats_threads = load_performance_data_threads(threads_csv)
+    perf_stats_processes = load_performance_data_processes(processes_csv)
+    perf_stats: dict[str, dict] = {}
+    perf_stats.update(perf_stats_threads)
+    for k, v in perf_stats_processes.items():
+        perf_stats[k] = {**perf_stats.get(k, {}), **v}
 
     # Partition tasks by tasks_type from settings.json
     threads_task_dirs = [
diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py
index 0e5a67f6..88174b4c 100644
--- a/scripts/create_perf_table.py
+++ b/scripts/create_perf_table.py
@@ -4,6 +4,111 @@
 import xlsxwriter
 import csv
 
+# -------------------------------
+# Helpers and configuration
+# -------------------------------
+
+# Known task types (used to pre-initialize tables)
+list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"]
+
+# Compile patterns once
+OLD_PATTERN = re.compile(r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)")
+NEW_PATTERN = re.compile(
+    r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)"
+)
+# Example formats:
+#   example_threads_omp_enabled:task_run:0.4749
+#   example_processes_2_mpi_enabled:pipeline:0.0507
+SIMPLE_PATTERN = re.compile(
+    r"(.+?)_(omp|seq|tbb|stl|all|mpi)_enabled:(task_run|pipeline):(-*\d*\.\d*)"
+)
+
+
+def _ensure_task_tables(result_tables: dict, perf_type: str, task_name: str) -> None:
+    if perf_type not in result_tables:
+        result_tables[perf_type] = {}
+    if task_name not in result_tables[perf_type]:
+        result_tables[perf_type][task_name] = {t: -1.0 for t in list_of_type_of_tasks}
+
+
+def _infer_category(task_name: str) -> str:
+    return "threads" if "threads" in task_name else "processes"
+
+
+def _columns_for_category(category: str) -> list[str]:
+    return ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"]
+
+
+def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str], cols: list[str], table: dict):
+    worksheet.set_column("A:Z", 23)
+    right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2})
+    bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2})
+    right_border = workbook.add_format({"right": 2})
+
+    worksheet.write(0, 0, "cpu_num = " + str(cpu_num), right_bold_border)
+
+    # Header (T_x, S, Eff) per column
+    col = 1
+    for ttype in cols:
+        worksheet.write(0, col, f"T_{ttype}({cpu_num})", bottom_bold_border)
+        col += 1
+        worksheet.write(
+            0,
+            col,
+            f"S({cpu_num}) = T_seq({cpu_num}) / T_{ttype}({cpu_num})",
+            bottom_bold_border,
+        )
+        col += 1
+        worksheet.write(0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border)
+        col += 1
+
+    # Task rows
+    row = 1
+    for task_name in tasks_list:
+        worksheet.write(row, 0, task_name, workbook.add_format({"bold": True, "right": 2}))
+        row += 1
+
+    # Values
+    row = 1
+    for task_name in tasks_list:
+        col = 1
+        for ttype in cols:
+            if task_name not in table:
+                # no data for task at all
+                worksheet.write(row, col, "—"); col += 1
+                worksheet.write(row, col, "—"); col += 1
+                worksheet.write(row, col, "—", right_border); col += 1
+                continue
+            par_time = table[task_name].get(ttype, -1.0)
+            seq_time = table[task_name].get("seq", -1.0)
+            if par_time in (0.0, -1.0) or seq_time in (0.0, -1.0):
+                speed_up = "—"
+                efficiency = "—"
+            else:
+                speed_up = seq_time / par_time
+                efficiency = speed_up / cpu_num
+            worksheet.write(row, col, par_time if par_time != -1.0 else "?"); col += 1
+            worksheet.write(row, col, speed_up); col += 1
+            worksheet.write(row, col, efficiency, right_border); col += 1
+        row += 1
+
+
+def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict):
+    with open(path, "w", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(header)
+        for task_name in tasks_list:
+            seq_time = table.get(task_name, {}).get("seq", -1.0)
+            if seq_time in (0.0, -1.0):
+                writer.writerow([task_name] + ["?" for _ in header[1:]])
+                continue
+            row = [task_name, 1.0]
+            # Remaining headers correspond to columns starting from 2
+            for col_name in header[2:]:
+                val = table[task_name].get(col_name.lower(), -1.0)
+                row.append(val / seq_time if val != -1.0 else "?")
+            writer.writerow(row)
+
 parser = argparse.ArgumentParser()
 parser.add_argument(
     "-i", "--input", help="Input file path (logs of perf tests, .txt)", required=True
@@ -17,30 +122,30 @@
 
 list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"]
 
+# For each perf_type (pipeline/task_run) store times per task
 result_tables = {"pipeline": {}, "task_run": {}}
-set_of_task_name = []
+# Map task name -> category (threads|processes)
+task_categories = {}
+# Track tasks per category to split output
+tasks_by_category = {"threads": set(), "processes": set()}
 
 logs_file = open(logs_path, "r")
 logs_lines = logs_file.readlines()
 for line in logs_lines:
     # Handle both old format: tasks/task_type/task_name:perf_type:time
     # and new format: namespace_task_type_enabled:perf_type:time
-    old_pattern = r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)"
-    new_pattern = (
-        r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)"
-    )
-
-    old_result = re.findall(old_pattern, line)
-    new_result = re.findall(new_pattern, line)
+    old_result = OLD_PATTERN.findall(line)
+    new_result = NEW_PATTERN.findall(line)
+    simple_result = SIMPLE_PATTERN.findall(line)
 
     if len(old_result):
         task_name = old_result[0][1]
         perf_type = old_result[0][2]
-        set_of_task_name.append(task_name)
-        result_tables[perf_type][task_name] = {}
-
-        for ttype in list_of_type_of_tasks:
-            result_tables[perf_type][task_name][ttype] = -1.0
+        # legacy: track task in threads category by default
+        _ensure_task_tables(result_tables, perf_type, task_name)
+        # Unknown category in legacy format; default to threads
+        task_categories[task_name] = "threads"
+        tasks_by_category["threads"].add(task_name)
     elif len(new_result):
         # Extract task name from namespace (e.g., "example_threads" from "nesterov_a_test_task_threads")
         full_task_name = new_result[0][0]
@@ -48,26 +153,30 @@
         task_name = f"example_{task_category}"
         perf_type = new_result[0][3]
 
-        if task_name not in set_of_task_name:
-            set_of_task_name.append(task_name)
+        # no set tracking needed; category mapping below
 
-        if perf_type not in result_tables:
-            result_tables[perf_type] = {}
-        if task_name not in result_tables[perf_type]:
-            result_tables[perf_type][task_name] = {}
-            for ttype in list_of_type_of_tasks:
-                result_tables[perf_type][task_name][ttype] = -1.0
+        _ensure_task_tables(result_tables, perf_type, task_name)
+        task_categories[task_name] = task_category
+        tasks_by_category[task_category].add(task_name)
+    elif len(simple_result):
+        # Extract task name in the current format (prefix already includes category suffix)
+        task_name = simple_result[0][0]
+        # Infer category by substring
+        task_category = "threads" if "threads" in task_name else "processes"
+        perf_type = simple_result[0][2]
+
+        # no set tracking needed; category mapping below
+
+        _ensure_task_tables(result_tables, perf_type, task_name)
+        task_categories[task_name] = task_category
+        tasks_by_category[task_category].add(task_name)
 
 for line in logs_lines:
     # Handle both old format: tasks/task_type/task_name:perf_type:time
     # and new format: namespace_task_type_enabled:perf_type:time
-    old_pattern = r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)"
-    new_pattern = (
-        r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)"
-    )
-
-    old_result = re.findall(old_pattern, line)
-    new_result = re.findall(new_pattern, line)
+    old_result = OLD_PATTERN.findall(line)
+    new_result = NEW_PATTERN.findall(line)
+    simple_result = SIMPLE_PATTERN.findall(line)
 
     if len(old_result):
         task_type = old_result[0][0]
@@ -80,7 +189,6 @@
         result_tables[perf_type][task_name][task_type] = perf_time
     elif len(new_result):
         # Extract task details from namespace format
-        full_task_name = new_result[0][0]
         task_category = new_result[0][1]  # "threads" or "processes"
         task_type = new_result[0][2]  # "all", "omp", "seq", etc.
         perf_type = new_result[0][3]
@@ -93,124 +201,53 @@
 
         if task_name in result_tables[perf_type]:
             result_tables[perf_type][task_name][task_type] = perf_time
+        task_categories[task_name] = task_category
+        tasks_by_category[task_category].add(task_name)
+    elif len(simple_result):
+        # Extract details from the simplified pattern (current logs)
+        task_name = simple_result[0][0]
+        # Infer category by substring present in task_name
+        task_category = "threads" if "threads" in task_name else "processes"
+        task_type = simple_result[0][1]
+        perf_type = simple_result[0][2]
+        perf_time = float(simple_result[0][3])
 
+        if perf_time < 0.001:
+            msg = f"Performance time = {perf_time} < 0.001 second : for {task_type} - {task_name} - {perf_type} \n"
+            raise Exception(msg)
 
-for table_name in result_tables:
-    workbook = xlsxwriter.Workbook(
-        os.path.join(xlsx_path, table_name + "_perf_table.xlsx")
-    )
-    worksheet = workbook.add_worksheet()
-    worksheet.set_column("A:Z", 23)
-    right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2})
-    bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2})
-    cpu_num = os.environ.get("PPC_NUM_PROC")
-    if cpu_num is None:
-        raise EnvironmentError(
-            "Required environment variable 'PPC_NUM_PROC' is not set."
-        )
-    cpu_num = int(cpu_num)
-    worksheet.write(0, 0, "cpu_num = " + str(cpu_num), right_bold_border)
-
-    it = 1
-    for type_of_task in list_of_type_of_tasks:
-        worksheet.write(
-            0, it, "T_" + type_of_task + "(" + str(cpu_num) + ")", bottom_bold_border
-        )
-        it += 1
-        worksheet.write(
-            0,
-            it,
-            "S("
-            + str(cpu_num)
-            + ")"
-            + " = "
-            + "T_seq("
-            + str(cpu_num)
-            + ")"
-            + " / "
-            + "T_"
-            + type_of_task
-            + "("
-            + str(cpu_num)
-            + ")",
-            bottom_bold_border,
-        )
-        it += 1
-        worksheet.write(
-            0,
-            it,
-            "Eff("
-            + str(cpu_num)
-            + ")"
-            + " = "
-            + "S("
-            + str(cpu_num)
-            + ")"
-            + " / "
-            + str(cpu_num),
-            right_bold_border,
-        )
-        it += 1
-
-    it = 1
-    for task_name in list(set(set_of_task_name)):
-        worksheet.write(
-            it, 0, task_name, workbook.add_format({"bold": True, "right": 2})
-        )
-        it += 1
-
-    it_i = 1
-    it_j = 1
-    right_border = workbook.add_format({"right": 2})
-    for task_name in list(set(set_of_task_name)):
-        for type_of_task in list_of_type_of_tasks:
-            if task_name not in result_tables[table_name].keys():
-                print(f"Warning! Task '{task_name}' is not found in results")
-                worksheet.write(it_j, it_i, "Error!")
-                it_i += 1
-                worksheet.write(it_j, it_i, "Error!")
-                it_i += 1
-                worksheet.write(it_j, it_i, "Error!")
-                it_i += 1
-                continue
-            par_time = result_tables[table_name][task_name][type_of_task]
-            seq_time = result_tables[table_name][task_name]["seq"]
-            if par_time == 0:
-                speed_up = -1
-            else:
-                speed_up = seq_time / par_time
-            efficiency = speed_up / cpu_num
-            worksheet.write(it_j, it_i, par_time)
-            it_i += 1
-            worksheet.write(it_j, it_i, speed_up)
-            it_i += 1
-            worksheet.write(it_j, it_i, efficiency, right_border)
-            it_i += 1
-        it_i = 1
-        it_j += 1
-    workbook.close()
-    # Dump CSV for performance times
-    csv_file = os.path.join(xlsx_path, table_name + "_perf_table.csv")
-    with open(csv_file, "w", newline="") as csvfile:
-        writer = csv.writer(csvfile)
-        # Write header: Task, SEQ, OMP, TBB, STL, ALL
-        writer.writerow(["Task", "SEQ", "OMP", "TBB", "STL", "ALL"])
-        for task_name in sorted(result_tables[table_name].keys()):
-            seq_time = result_tables[table_name][task_name]["seq"]
-            row = [
-                task_name,
-                1.0 if seq_time != 0 else "?",
-                (result_tables[table_name][task_name]["omp"] / seq_time)
-                if seq_time != 0
-                else "?",
-                (result_tables[table_name][task_name]["tbb"] / seq_time)
-                if seq_time != 0
-                else "?",
-                (result_tables[table_name][task_name]["stl"] / seq_time)
-                if seq_time != 0
-                else "?",
-                (result_tables[table_name][task_name]["all"] / seq_time)
-                if seq_time != 0
-                else "?",
-            ]
-            writer.writerow(row)
+        if perf_type not in result_tables:
+            result_tables[perf_type] = {}
+        if task_name not in result_tables[perf_type]:
+            result_tables[perf_type][task_name] = {}
+            for ttype in list_of_type_of_tasks:
+                result_tables[perf_type][task_name][ttype] = -1.0
+        result_tables[perf_type][task_name][task_type] = perf_time
+        task_categories[task_name] = task_category
+        tasks_by_category[task_category].add(task_name)
+
+
+for table_name, table_data in result_tables.items():
+    # Prepare two workbooks/CSVs: threads and processes
+    for category in ["threads", "processes"]:
+        tasks_list = sorted(tasks_by_category[category])
+        if not tasks_list:
+            continue
+
+        cpu_num_env = os.environ.get("PPC_NUM_PROC")
+        if cpu_num_env is None:
+            raise EnvironmentError("Required environment variable 'PPC_NUM_PROC' is not set.")
+        cpu_num = int(cpu_num_env)
+        cols = _columns_for_category(category)
+
+        # Excel
+        wb_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx")
+        workbook = xlsxwriter.Workbook(wb_path)
+        worksheet = workbook.add_worksheet()
+        _write_excel_sheet(workbook, worksheet, cpu_num, tasks_list, cols, table_data)
+        workbook.close()
+
+        # CSV
+        header = ["Task", "SEQ"] + [c.upper() for c in cols[1:]]
+        csv_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.csv")
+        _write_csv(csv_path, header, tasks_list, table_data)

From b35e4b7fcfaff17721cc8bb6b21579b7c1e54747 Mon Sep 17 00:00:00 2001
From: Alexander Nesterov <nesterov.alexander@outlook.com>
Date: Fri, 10 Oct 2025 19:51:38 +0200
Subject: [PATCH 2/5] refactor formatting and improve clarity in performance
 table generation

---
 scoreboard/main.py           | 18 +++++++++---
 scripts/create_perf_table.py | 56 ++++++++++++++++++++++++++----------
 2 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/scoreboard/main.py b/scoreboard/main.py
index 31976212..92015ec4 100644
--- a/scoreboard/main.py
+++ b/scoreboard/main.py
@@ -676,19 +676,29 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
 
     # Locate perf CSVs from CI or local runs (threads and processes)
     candidates_threads = [
-        script_dir.parent / "build" / "perf_stat_dir" / "threads_task_run_perf_table.csv",
+        script_dir.parent
+        / "build"
+        / "perf_stat_dir"
+        / "threads_task_run_perf_table.csv",
         script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv",
         # Fallback to old single-file name
         script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv",
         script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv",
     ]
-    threads_csv = next((p for p in candidates_threads if p.exists()), candidates_threads[0])
+    threads_csv = next(
+        (p for p in candidates_threads if p.exists()), candidates_threads[0]
+    )
 
     candidates_processes = [
-        script_dir.parent / "build" / "perf_stat_dir" / "processes_task_run_perf_table.csv",
+        script_dir.parent
+        / "build"
+        / "perf_stat_dir"
+        / "processes_task_run_perf_table.csv",
         script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv",
     ]
-    processes_csv = next((p for p in candidates_processes if p.exists()), candidates_processes[0])
+    processes_csv = next(
+        (p for p in candidates_processes if p.exists()), candidates_processes[0]
+    )
 
     # Read and merge performance statistics CSVs
     perf_stats_threads = load_performance_data_threads(threads_csv)
diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py
index 88174b4c..4cb1d5ce 100644
--- a/scripts/create_perf_table.py
+++ b/scripts/create_perf_table.py
@@ -36,10 +36,19 @@ def _infer_category(task_name: str) -> str:
 
 
 def _columns_for_category(category: str) -> list[str]:
-    return ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"]
-
-
-def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str], cols: list[str], table: dict):
+    return (
+        ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"]
+    )
+
+
+def _write_excel_sheet(
+    workbook,
+    worksheet,
+    cpu_num: int,
+    tasks_list: list[str],
+    cols: list[str],
+    table: dict,
+):
     worksheet.set_column("A:Z", 23)
     right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2})
     bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2})
@@ -59,13 +68,17 @@ def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str],
             bottom_bold_border,
         )
         col += 1
-        worksheet.write(0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border)
+        worksheet.write(
+            0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border
+        )
         col += 1
 
     # Task rows
     row = 1
     for task_name in tasks_list:
-        worksheet.write(row, 0, task_name, workbook.add_format({"bold": True, "right": 2}))
+        worksheet.write(
+            row, 0, task_name, workbook.add_format({"bold": True, "right": 2})
+        )
         row += 1
 
     # Values
@@ -75,9 +88,12 @@ def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str],
         for ttype in cols:
             if task_name not in table:
                 # no data for task at all
-                worksheet.write(row, col, "—"); col += 1
-                worksheet.write(row, col, "—"); col += 1
-                worksheet.write(row, col, "—", right_border); col += 1
+                worksheet.write(row, col, "—")
+                col += 1
+                worksheet.write(row, col, "—")
+                col += 1
+                worksheet.write(row, col, "—", right_border)
+                col += 1
                 continue
             par_time = table[task_name].get(ttype, -1.0)
             seq_time = table[task_name].get("seq", -1.0)
@@ -87,9 +103,12 @@ def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str],
             else:
                 speed_up = seq_time / par_time
                 efficiency = speed_up / cpu_num
-            worksheet.write(row, col, par_time if par_time != -1.0 else "?"); col += 1
-            worksheet.write(row, col, speed_up); col += 1
-            worksheet.write(row, col, efficiency, right_border); col += 1
+            worksheet.write(row, col, par_time if par_time != -1.0 else "?")
+            col += 1
+            worksheet.write(row, col, speed_up)
+            col += 1
+            worksheet.write(row, col, efficiency, right_border)
+            col += 1
         row += 1
 
 
@@ -109,6 +128,7 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict)
                 row.append(val / seq_time if val != -1.0 else "?")
             writer.writerow(row)
 
+
 parser = argparse.ArgumentParser()
 parser.add_argument(
     "-i", "--input", help="Input file path (logs of perf tests, .txt)", required=True
@@ -236,12 +256,16 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict)
 
         cpu_num_env = os.environ.get("PPC_NUM_PROC")
         if cpu_num_env is None:
-            raise EnvironmentError("Required environment variable 'PPC_NUM_PROC' is not set.")
+            raise EnvironmentError(
+                "Required environment variable 'PPC_NUM_PROC' is not set."
+            )
         cpu_num = int(cpu_num_env)
         cols = _columns_for_category(category)
 
         # Excel
-        wb_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx")
+        wb_path = os.path.join(
+            xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx"
+        )
         workbook = xlsxwriter.Workbook(wb_path)
         worksheet = workbook.add_worksheet()
         _write_excel_sheet(workbook, worksheet, cpu_num, tasks_list, cols, table_data)
@@ -249,5 +273,7 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict)
 
         # CSV
         header = ["Task", "SEQ"] + [c.upper() for c in cols[1:]]
-        csv_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.csv")
+        csv_path = os.path.join(
+            xlsx_path, f"{category}_" + table_name + "_perf_table.csv"
+        )
         _write_csv(csv_path, header, tasks_list, table_data)

From 0a0806a3584a44d24682a569eb48ac5acd6ab00e Mon Sep 17 00:00:00 2001
From: Alexander Nesterov <nesterov.alexander@outlook.com>
Date: Fri, 10 Oct 2025 23:27:42 +0200
Subject: [PATCH 3/5] refactor performance statistics handling to improve key
 resolution and alignment with task directories

---
 scoreboard/main.py | 91 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 84 insertions(+), 7 deletions(-)

diff --git a/scoreboard/main.py b/scoreboard/main.py
index 92015ec4..ec187d3a 100644
--- a/scoreboard/main.py
+++ b/scoreboard/main.py
@@ -700,13 +700,13 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
         (p for p in candidates_processes if p.exists()), candidates_processes[0]
     )
 
-    # Read and merge performance statistics CSVs
+    # Read and merge performance statistics CSVs (keys = CSV Task column)
     perf_stats_threads = load_performance_data_threads(threads_csv)
     perf_stats_processes = load_performance_data_processes(processes_csv)
-    perf_stats: dict[str, dict] = {}
-    perf_stats.update(perf_stats_threads)
+    perf_stats_raw: dict[str, dict] = {}
+    perf_stats_raw.update(perf_stats_threads)
     for k, v in perf_stats_processes.items():
-        perf_stats[k] = {**perf_stats.get(k, {}), **v}
+        perf_stats_raw[k] = {**perf_stats_raw.get(k, {}), **v}
 
     # Partition tasks by tasks_type from settings.json
     threads_task_dirs = [
@@ -724,6 +724,73 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
             elif "processes" in name:
                 processes_task_dirs.append(name)
 
+    # Resolve performance stats keys (from CSV Task names) to actual task directories
+    import re as _re
+
+    def _family_from_name(name: str) -> tuple[str, int]:
+        # Infer family from CSV Task value, using only structural markers
+        # threads -> ("threads", 0); processes[_N] -> ("processes", N|1)
+        if "threads" in name:
+            return "threads", 0
+        if "processes" in name:
+            m = _re.search(r"processes(?:_(\d+))?", name)
+            if m:
+                try:
+                    idx = int(m.group(1)) if m.group(1) else 1
+                except Exception:
+                    idx = 1
+            else:
+                idx = 1
+            return "processes", idx
+        # Fallback: treat as threads family
+        return "threads", 0
+
+    def _family_from_dir(dir_name: str) -> tuple[str, int]:
+        # Prefer explicit tasks_type from settings.json and task_number from info.json
+        kind_guess = tasks_type_map.get(dir_name) or (
+            "threads" if "threads" in dir_name else "processes"
+        )
+        idx = 0
+        if kind_guess == "processes":
+            # Lightweight reader to avoid dependency on later-scoped helpers
+            try:
+                import json as _json
+
+                info_path = tasks_dir / dir_name / "info.json"
+                if info_path.exists():
+                    with open(info_path, "r") as _f:
+                        data = _json.load(_f)
+                    s = data.get("student", {}) if isinstance(data, dict) else {}
+                    try:
+                        idx = int(str(s.get("task_number", "0")))
+                    except Exception:
+                        idx = 0
+            except Exception:
+                idx = 0
+        return kind_guess, idx
+
+    # Build map family -> list of dir names in this repo
+    family_to_dirs: dict[tuple[str, int], list[str]] = {}
+    for d in sorted(directories.keys()):
+        fam = _family_from_dir(d)
+        family_to_dirs.setdefault(fam, []).append(d)
+
+    # Aggregate perf by family (CSV keys may not match dir names)
+    perf_by_family: dict[tuple[str, int], dict] = {}
+    for key, vals in perf_stats_raw.items():
+        fam = _family_from_name(key)
+        perf_by_family[fam] = {**perf_by_family.get(fam, {}), **vals}
+
+    # Project family perf onto actual directories (prefer exact one per family)
+    perf_stats: dict[str, dict] = {}
+    for fam, vals in perf_by_family.items():
+        dirs_for_family = family_to_dirs.get(fam, [])
+        if not dirs_for_family:
+            continue
+        # Assign same perf to all dirs in the family (usually one)
+        for d in dirs_for_family:
+            perf_stats[d] = vals.copy()
+
     # Build rows for each page
     threads_rows = _build_rows_for_task_types(
         task_types_threads,
@@ -758,7 +825,7 @@ def _identity_key(student: dict) -> str:
             ]
         )
 
-    def _build_cell(dir_name: str, ttype: str):
+    def _build_cell(dir_name: str, ttype: str, perf_map: dict[str, dict]):
         status = directories[dir_name].get(ttype)
         sol_points, solution_style = get_solution_points_and_style(ttype, status, cfg)
         task_points = sol_points
@@ -766,7 +833,7 @@ def _build_cell(dir_name: str, ttype: str):
             dir_name, ttype, sol_points, plagiarism_cfg, cfg, semester="processes"
         )
         task_points += plagiarism_points
-        perf_val = perf_stats.get(dir_name, {}).get(ttype, "?")
+        perf_val = perf_map.get(dir_name, {}).get(ttype, "?")
         acceleration, efficiency = calculate_performance_metrics(
             perf_val, eff_num_proc, ttype
         )
@@ -832,7 +899,7 @@ def _build_cell(dir_name: str, ttype: str):
             proc_group_headers.append({"type": "seq"})
             group_cells = []
             for ttype in ["mpi", "seq"]:
-                cell, _ = _build_cell(d, ttype)
+                cell, _ = _build_cell(d, ttype, perf_stats)
                 group_cells.append(cell)
             # Override displayed points for processes: S under MPI/SEQ from points-info; A points under MPI only
             s_mpi, s_seq, a_mpi, r_max = _find_process_points(cfg, n)
@@ -948,6 +1015,16 @@ def _build_cell(dir_name: str, ttype: str):
         }
     ]
 
+    # Rebuild threads rows with resolved perf stats
+    threads_rows = _build_rows_for_task_types(
+        task_types_threads,
+        threads_task_dirs,
+        perf_stats,
+        cfg,
+        eff_num_proc,
+        deadlines_cfg,
+    )
+
     parser = argparse.ArgumentParser(description="Generate HTML scoreboard.")
     parser.add_argument(
         "-o", "--output", type=str, required=True, help="Output directory path"

From c0ec6fadfdf22d3a1fdba7f14a92298bd8d1279f Mon Sep 17 00:00:00 2001
From: Alexander Nesterov <nesterov.alexander@outlook.com>
Date: Sat, 18 Oct 2025 12:32:01 +0200
Subject: [PATCH 4/5] refactor performance table script: improve file handling
 and environment variable management

---
 scripts/create_perf_table.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py
index 4cb1d5ce..5e096358 100644
--- a/scripts/create_perf_table.py
+++ b/scripts/create_perf_table.py
@@ -140,8 +140,6 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict)
 logs_path = os.path.abspath(args.input)
 xlsx_path = os.path.abspath(args.output)
 
-list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"]
-
 # For each perf_type (pipeline/task_run) store times per task
 result_tables = {"pipeline": {}, "task_run": {}}
 # Map task name -> category (threads|processes)
@@ -149,8 +147,8 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict)
 # Track tasks per category to split output
 tasks_by_category = {"threads": set(), "processes": set()}
 
-logs_file = open(logs_path, "r")
-logs_lines = logs_file.readlines()
+with open(logs_path, "r") as logs_file:
+    logs_lines = logs_file.readlines()
 for line in logs_lines:
     # Handle both old format: tasks/task_type/task_name:perf_type:time
     # and new format: namespace_task_type_enabled:perf_type:time
@@ -254,11 +252,19 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict)
         if not tasks_list:
             continue
 
-        cpu_num_env = os.environ.get("PPC_NUM_PROC")
-        if cpu_num_env is None:
-            raise EnvironmentError(
-                "Required environment variable 'PPC_NUM_PROC' is not set."
-            )
+        # Use appropriate env var per category
+        if category == "threads":
+            cpu_num_env = os.environ.get("PPC_NUM_THREADS")
+            if cpu_num_env is None:
+                raise EnvironmentError(
+                    "Required environment variable 'PPC_NUM_THREADS' is not set."
+                )
+        else:
+            cpu_num_env = os.environ.get("PPC_NUM_PROC")
+            if cpu_num_env is None:
+                raise EnvironmentError(
+                    "Required environment variable 'PPC_NUM_PROC' is not set."
+                )
         cpu_num = int(cpu_num_env)
         cols = _columns_for_category(category)
 

From 17adc592a8348574c13883de3635b60a05bc4a0b Mon Sep 17 00:00:00 2001
From: Alexander Nesterov <nesterov.alexander@outlook.com>
Date: Sat, 18 Oct 2025 13:48:07 +0200
Subject: [PATCH 5/5] refactor workflow: enhance performance data extraction
 with nested archive handling

---
 .github/workflows/pages.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index ec0f14fe..259446c2 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -84,7 +84,13 @@ jobs:
       - name: Extract performance data
         run: |
           mkdir -p build/perf_stat_dir
+          # The uploaded artifact contains a nested perf-stat.zip inside.
+          # First unzip extracts the inner archive; the second extracts perf_stat_dir/*.
           unzip -o perf-stat.zip -d .
+          if [ -f "perf-stat.zip" ]; then
+            mv -f perf-stat.zip perf-stat-inner.zip
+            unzip -o perf-stat-inner.zip -d .
+          fi
       - name: CMake configure
         run: |
           cmake -S . -B build -DUSE_SCOREBOARD=ON