From 20b8f1b493f5973067cf2be56d4289fca0211d06 Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Fri, 10 Oct 2025 19:32:37 +0200 Subject: [PATCH 1/5] refactor performance data handling and output generation --- scoreboard/main.py | 72 ++++++-- scripts/create_perf_table.py | 333 +++++++++++++++++++---------------- 2 files changed, 239 insertions(+), 166 deletions(-) diff --git a/scoreboard/main.py b/scoreboard/main.py index 25c1206d..31976212 100644 --- a/scoreboard/main.py +++ b/scoreboard/main.py @@ -108,26 +108,48 @@ def discover_tasks(tasks_dir, task_types): directories, tasks_type_map = discover_tasks(tasks_dir, task_types) -def load_performance_data(perf_stat_file_path): - """Load and parse performance statistics from CSV file.""" +def load_performance_data_threads(perf_stat_file_path: Path) -> dict: + """Load threads performance ratios (T_x/T_seq) from CSV. + Expected header: Task, SEQ, OMP, TBB, STL, ALL + """ + perf_stats: dict[str, dict] = {} + if perf_stat_file_path.exists(): + with open(perf_stat_file_path, "r", newline="") as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + task_name = row.get("Task") + if not task_name: + continue + perf_stats[task_name] = { + "seq": row.get("SEQ", "?"), + "omp": row.get("OMP", "?"), + "tbb": row.get("TBB", "?"), + "stl": row.get("STL", "?"), + "all": row.get("ALL", "?"), + } + else: + logger.warning("Threads perf stats CSV not found at %s", perf_stat_file_path) + return perf_stats - perf_stats = dict() + +def load_performance_data_processes(perf_stat_file_path: Path) -> dict: + """Load processes performance ratios (T_x/T_seq) from CSV. + Expected header: Task, SEQ, MPI + """ + perf_stats: dict[str, dict] = {} if perf_stat_file_path.exists(): with open(perf_stat_file_path, "r", newline="") as csvfile: reader = csv.DictReader(csvfile) for row in reader: task_name = row.get("Task") - if task_name: - perf_stats[task_name] = { - "seq": row.get("SEQ", "?"), - "omp": row.get("OMP", "?"), - "tbb": row.get("TBB", "?"), - "stl": row.get("STL", "?"), - "all": row.get("ALL", "?"), - "mpi": "N/A", - } + if not task_name: + continue + perf_stats[task_name] = { + "seq": row.get("SEQ", "?"), + "mpi": row.get("MPI", "?"), + } else: - logger.warning("Performance stats CSV not found at %s", perf_stat_file_path) + logger.warning("Processes perf stats CSV not found at %s", perf_stat_file_path) return perf_stats @@ -652,15 +674,29 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]: ds = _evenly_spaced_dates(n_items, s, e) return ds - # Locate perf CSV from CI or local runs - candidates = [ + # Locate perf CSVs from CI or local runs (threads and processes) + candidates_threads = [ + script_dir.parent / "build" / "perf_stat_dir" / "threads_task_run_perf_table.csv", + script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv", + # Fallback to old single-file name script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv", script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv", ] - perf_stat_file_path = next((p for p in candidates if p.exists()), candidates[0]) + threads_csv = next((p for p in candidates_threads if p.exists()), candidates_threads[0]) - # Read and parse performance statistics CSV - perf_stats = load_performance_data(perf_stat_file_path) + candidates_processes = [ + script_dir.parent / "build" / "perf_stat_dir" / "processes_task_run_perf_table.csv", + script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv", + ] + processes_csv = next((p for p in candidates_processes if p.exists()), candidates_processes[0]) + + # Read and merge performance statistics CSVs + perf_stats_threads = load_performance_data_threads(threads_csv) + perf_stats_processes = load_performance_data_processes(processes_csv) + perf_stats: dict[str, dict] = {} + perf_stats.update(perf_stats_threads) + for k, v in perf_stats_processes.items(): + perf_stats[k] = {**perf_stats.get(k, {}), **v} # Partition tasks by tasks_type from settings.json threads_task_dirs = [ diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py index 0e5a67f6..88174b4c 100644 --- a/scripts/create_perf_table.py +++ b/scripts/create_perf_table.py @@ -4,6 +4,111 @@ import xlsxwriter import csv +# ------------------------------- +# Helpers and configuration +# ------------------------------- + +# Known task types (used to pre-initialize tables) +list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"] + +# Compile patterns once +OLD_PATTERN = re.compile(r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)") +NEW_PATTERN = re.compile( + r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)" +) +# Example formats: +# example_threads_omp_enabled:task_run:0.4749 +# example_processes_2_mpi_enabled:pipeline:0.0507 +SIMPLE_PATTERN = re.compile( + r"(.+?)_(omp|seq|tbb|stl|all|mpi)_enabled:(task_run|pipeline):(-*\d*\.\d*)" +) + + +def _ensure_task_tables(result_tables: dict, perf_type: str, task_name: str) -> None: + if perf_type not in result_tables: + result_tables[perf_type] = {} + if task_name not in result_tables[perf_type]: + result_tables[perf_type][task_name] = {t: -1.0 for t in list_of_type_of_tasks} + + +def _infer_category(task_name: str) -> str: + return "threads" if "threads" in task_name else "processes" + + +def _columns_for_category(category: str) -> list[str]: + return ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"] + + +def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str], cols: list[str], table: dict): + worksheet.set_column("A:Z", 23) + right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2}) + bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2}) + right_border = workbook.add_format({"right": 2}) + + worksheet.write(0, 0, "cpu_num = " + str(cpu_num), right_bold_border) + + # Header (T_x, S, Eff) per column + col = 1 + for ttype in cols: + worksheet.write(0, col, f"T_{ttype}({cpu_num})", bottom_bold_border) + col += 1 + worksheet.write( + 0, + col, + f"S({cpu_num}) = T_seq({cpu_num}) / T_{ttype}({cpu_num})", + bottom_bold_border, + ) + col += 1 + worksheet.write(0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border) + col += 1 + + # Task rows + row = 1 + for task_name in tasks_list: + worksheet.write(row, 0, task_name, workbook.add_format({"bold": True, "right": 2})) + row += 1 + + # Values + row = 1 + for task_name in tasks_list: + col = 1 + for ttype in cols: + if task_name not in table: + # no data for task at all + worksheet.write(row, col, "—"); col += 1 + worksheet.write(row, col, "—"); col += 1 + worksheet.write(row, col, "—", right_border); col += 1 + continue + par_time = table[task_name].get(ttype, -1.0) + seq_time = table[task_name].get("seq", -1.0) + if par_time in (0.0, -1.0) or seq_time in (0.0, -1.0): + speed_up = "—" + efficiency = "—" + else: + speed_up = seq_time / par_time + efficiency = speed_up / cpu_num + worksheet.write(row, col, par_time if par_time != -1.0 else "?"); col += 1 + worksheet.write(row, col, speed_up); col += 1 + worksheet.write(row, col, efficiency, right_border); col += 1 + row += 1 + + +def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict): + with open(path, "w", newline="") as csvfile: + writer = csv.writer(csvfile) + writer.writerow(header) + for task_name in tasks_list: + seq_time = table.get(task_name, {}).get("seq", -1.0) + if seq_time in (0.0, -1.0): + writer.writerow([task_name] + ["?" for _ in header[1:]]) + continue + row = [task_name, 1.0] + # Remaining headers correspond to columns starting from 2 + for col_name in header[2:]: + val = table[task_name].get(col_name.lower(), -1.0) + row.append(val / seq_time if val != -1.0 else "?") + writer.writerow(row) + parser = argparse.ArgumentParser() parser.add_argument( "-i", "--input", help="Input file path (logs of perf tests, .txt)", required=True @@ -17,30 +122,30 @@ list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"] +# For each perf_type (pipeline/task_run) store times per task result_tables = {"pipeline": {}, "task_run": {}} -set_of_task_name = [] +# Map task name -> category (threads|processes) +task_categories = {} +# Track tasks per category to split output +tasks_by_category = {"threads": set(), "processes": set()} logs_file = open(logs_path, "r") logs_lines = logs_file.readlines() for line in logs_lines: # Handle both old format: tasks/task_type/task_name:perf_type:time # and new format: namespace_task_type_enabled:perf_type:time - old_pattern = r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)" - new_pattern = ( - r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)" - ) - - old_result = re.findall(old_pattern, line) - new_result = re.findall(new_pattern, line) + old_result = OLD_PATTERN.findall(line) + new_result = NEW_PATTERN.findall(line) + simple_result = SIMPLE_PATTERN.findall(line) if len(old_result): task_name = old_result[0][1] perf_type = old_result[0][2] - set_of_task_name.append(task_name) - result_tables[perf_type][task_name] = {} - - for ttype in list_of_type_of_tasks: - result_tables[perf_type][task_name][ttype] = -1.0 + # legacy: track task in threads category by default + _ensure_task_tables(result_tables, perf_type, task_name) + # Unknown category in legacy format; default to threads + task_categories[task_name] = "threads" + tasks_by_category["threads"].add(task_name) elif len(new_result): # Extract task name from namespace (e.g., "example_threads" from "nesterov_a_test_task_threads") full_task_name = new_result[0][0] @@ -48,26 +153,30 @@ task_name = f"example_{task_category}" perf_type = new_result[0][3] - if task_name not in set_of_task_name: - set_of_task_name.append(task_name) + # no set tracking needed; category mapping below - if perf_type not in result_tables: - result_tables[perf_type] = {} - if task_name not in result_tables[perf_type]: - result_tables[perf_type][task_name] = {} - for ttype in list_of_type_of_tasks: - result_tables[perf_type][task_name][ttype] = -1.0 + _ensure_task_tables(result_tables, perf_type, task_name) + task_categories[task_name] = task_category + tasks_by_category[task_category].add(task_name) + elif len(simple_result): + # Extract task name in the current format (prefix already includes category suffix) + task_name = simple_result[0][0] + # Infer category by substring + task_category = "threads" if "threads" in task_name else "processes" + perf_type = simple_result[0][2] + + # no set tracking needed; category mapping below + + _ensure_task_tables(result_tables, perf_type, task_name) + task_categories[task_name] = task_category + tasks_by_category[task_category].add(task_name) for line in logs_lines: # Handle both old format: tasks/task_type/task_name:perf_type:time # and new format: namespace_task_type_enabled:perf_type:time - old_pattern = r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)" - new_pattern = ( - r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)" - ) - - old_result = re.findall(old_pattern, line) - new_result = re.findall(new_pattern, line) + old_result = OLD_PATTERN.findall(line) + new_result = NEW_PATTERN.findall(line) + simple_result = SIMPLE_PATTERN.findall(line) if len(old_result): task_type = old_result[0][0] @@ -80,7 +189,6 @@ result_tables[perf_type][task_name][task_type] = perf_time elif len(new_result): # Extract task details from namespace format - full_task_name = new_result[0][0] task_category = new_result[0][1] # "threads" or "processes" task_type = new_result[0][2] # "all", "omp", "seq", etc. perf_type = new_result[0][3] @@ -93,124 +201,53 @@ if task_name in result_tables[perf_type]: result_tables[perf_type][task_name][task_type] = perf_time + task_categories[task_name] = task_category + tasks_by_category[task_category].add(task_name) + elif len(simple_result): + # Extract details from the simplified pattern (current logs) + task_name = simple_result[0][0] + # Infer category by substring present in task_name + task_category = "threads" if "threads" in task_name else "processes" + task_type = simple_result[0][1] + perf_type = simple_result[0][2] + perf_time = float(simple_result[0][3]) + if perf_time < 0.001: + msg = f"Performance time = {perf_time} < 0.001 second : for {task_type} - {task_name} - {perf_type} \n" + raise Exception(msg) -for table_name in result_tables: - workbook = xlsxwriter.Workbook( - os.path.join(xlsx_path, table_name + "_perf_table.xlsx") - ) - worksheet = workbook.add_worksheet() - worksheet.set_column("A:Z", 23) - right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2}) - bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2}) - cpu_num = os.environ.get("PPC_NUM_PROC") - if cpu_num is None: - raise EnvironmentError( - "Required environment variable 'PPC_NUM_PROC' is not set." - ) - cpu_num = int(cpu_num) - worksheet.write(0, 0, "cpu_num = " + str(cpu_num), right_bold_border) - - it = 1 - for type_of_task in list_of_type_of_tasks: - worksheet.write( - 0, it, "T_" + type_of_task + "(" + str(cpu_num) + ")", bottom_bold_border - ) - it += 1 - worksheet.write( - 0, - it, - "S(" - + str(cpu_num) - + ")" - + " = " - + "T_seq(" - + str(cpu_num) - + ")" - + " / " - + "T_" - + type_of_task - + "(" - + str(cpu_num) - + ")", - bottom_bold_border, - ) - it += 1 - worksheet.write( - 0, - it, - "Eff(" - + str(cpu_num) - + ")" - + " = " - + "S(" - + str(cpu_num) - + ")" - + " / " - + str(cpu_num), - right_bold_border, - ) - it += 1 - - it = 1 - for task_name in list(set(set_of_task_name)): - worksheet.write( - it, 0, task_name, workbook.add_format({"bold": True, "right": 2}) - ) - it += 1 - - it_i = 1 - it_j = 1 - right_border = workbook.add_format({"right": 2}) - for task_name in list(set(set_of_task_name)): - for type_of_task in list_of_type_of_tasks: - if task_name not in result_tables[table_name].keys(): - print(f"Warning! Task '{task_name}' is not found in results") - worksheet.write(it_j, it_i, "Error!") - it_i += 1 - worksheet.write(it_j, it_i, "Error!") - it_i += 1 - worksheet.write(it_j, it_i, "Error!") - it_i += 1 - continue - par_time = result_tables[table_name][task_name][type_of_task] - seq_time = result_tables[table_name][task_name]["seq"] - if par_time == 0: - speed_up = -1 - else: - speed_up = seq_time / par_time - efficiency = speed_up / cpu_num - worksheet.write(it_j, it_i, par_time) - it_i += 1 - worksheet.write(it_j, it_i, speed_up) - it_i += 1 - worksheet.write(it_j, it_i, efficiency, right_border) - it_i += 1 - it_i = 1 - it_j += 1 - workbook.close() - # Dump CSV for performance times - csv_file = os.path.join(xlsx_path, table_name + "_perf_table.csv") - with open(csv_file, "w", newline="") as csvfile: - writer = csv.writer(csvfile) - # Write header: Task, SEQ, OMP, TBB, STL, ALL - writer.writerow(["Task", "SEQ", "OMP", "TBB", "STL", "ALL"]) - for task_name in sorted(result_tables[table_name].keys()): - seq_time = result_tables[table_name][task_name]["seq"] - row = [ - task_name, - 1.0 if seq_time != 0 else "?", - (result_tables[table_name][task_name]["omp"] / seq_time) - if seq_time != 0 - else "?", - (result_tables[table_name][task_name]["tbb"] / seq_time) - if seq_time != 0 - else "?", - (result_tables[table_name][task_name]["stl"] / seq_time) - if seq_time != 0 - else "?", - (result_tables[table_name][task_name]["all"] / seq_time) - if seq_time != 0 - else "?", - ] - writer.writerow(row) + if perf_type not in result_tables: + result_tables[perf_type] = {} + if task_name not in result_tables[perf_type]: + result_tables[perf_type][task_name] = {} + for ttype in list_of_type_of_tasks: + result_tables[perf_type][task_name][ttype] = -1.0 + result_tables[perf_type][task_name][task_type] = perf_time + task_categories[task_name] = task_category + tasks_by_category[task_category].add(task_name) + + +for table_name, table_data in result_tables.items(): + # Prepare two workbooks/CSVs: threads and processes + for category in ["threads", "processes"]: + tasks_list = sorted(tasks_by_category[category]) + if not tasks_list: + continue + + cpu_num_env = os.environ.get("PPC_NUM_PROC") + if cpu_num_env is None: + raise EnvironmentError("Required environment variable 'PPC_NUM_PROC' is not set.") + cpu_num = int(cpu_num_env) + cols = _columns_for_category(category) + + # Excel + wb_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx") + workbook = xlsxwriter.Workbook(wb_path) + worksheet = workbook.add_worksheet() + _write_excel_sheet(workbook, worksheet, cpu_num, tasks_list, cols, table_data) + workbook.close() + + # CSV + header = ["Task", "SEQ"] + [c.upper() for c in cols[1:]] + csv_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.csv") + _write_csv(csv_path, header, tasks_list, table_data) From b35e4b7fcfaff17721cc8bb6b21579b7c1e54747 Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Fri, 10 Oct 2025 19:51:38 +0200 Subject: [PATCH 2/5] refactor formatting and improve clarity in performance table generation --- scoreboard/main.py | 18 +++++++++--- scripts/create_perf_table.py | 56 ++++++++++++++++++++++++++---------- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/scoreboard/main.py b/scoreboard/main.py index 31976212..92015ec4 100644 --- a/scoreboard/main.py +++ b/scoreboard/main.py @@ -676,19 +676,29 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]: # Locate perf CSVs from CI or local runs (threads and processes) candidates_threads = [ - script_dir.parent / "build" / "perf_stat_dir" / "threads_task_run_perf_table.csv", + script_dir.parent + / "build" + / "perf_stat_dir" + / "threads_task_run_perf_table.csv", script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv", # Fallback to old single-file name script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv", script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv", ] - threads_csv = next((p for p in candidates_threads if p.exists()), candidates_threads[0]) + threads_csv = next( + (p for p in candidates_threads if p.exists()), candidates_threads[0] + ) candidates_processes = [ - script_dir.parent / "build" / "perf_stat_dir" / "processes_task_run_perf_table.csv", + script_dir.parent + / "build" + / "perf_stat_dir" + / "processes_task_run_perf_table.csv", script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv", ] - processes_csv = next((p for p in candidates_processes if p.exists()), candidates_processes[0]) + processes_csv = next( + (p for p in candidates_processes if p.exists()), candidates_processes[0] + ) # Read and merge performance statistics CSVs perf_stats_threads = load_performance_data_threads(threads_csv) diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py index 88174b4c..4cb1d5ce 100644 --- a/scripts/create_perf_table.py +++ b/scripts/create_perf_table.py @@ -36,10 +36,19 @@ def _infer_category(task_name: str) -> str: def _columns_for_category(category: str) -> list[str]: - return ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"] - - -def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str], cols: list[str], table: dict): + return ( + ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"] + ) + + +def _write_excel_sheet( + workbook, + worksheet, + cpu_num: int, + tasks_list: list[str], + cols: list[str], + table: dict, +): worksheet.set_column("A:Z", 23) right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2}) bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2}) @@ -59,13 +68,17 @@ def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str], bottom_bold_border, ) col += 1 - worksheet.write(0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border) + worksheet.write( + 0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border + ) col += 1 # Task rows row = 1 for task_name in tasks_list: - worksheet.write(row, 0, task_name, workbook.add_format({"bold": True, "right": 2})) + worksheet.write( + row, 0, task_name, workbook.add_format({"bold": True, "right": 2}) + ) row += 1 # Values @@ -75,9 +88,12 @@ def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str], for ttype in cols: if task_name not in table: # no data for task at all - worksheet.write(row, col, "—"); col += 1 - worksheet.write(row, col, "—"); col += 1 - worksheet.write(row, col, "—", right_border); col += 1 + worksheet.write(row, col, "—") + col += 1 + worksheet.write(row, col, "—") + col += 1 + worksheet.write(row, col, "—", right_border) + col += 1 continue par_time = table[task_name].get(ttype, -1.0) seq_time = table[task_name].get("seq", -1.0) @@ -87,9 +103,12 @@ def _write_excel_sheet(workbook, worksheet, cpu_num: int, tasks_list: list[str], else: speed_up = seq_time / par_time efficiency = speed_up / cpu_num - worksheet.write(row, col, par_time if par_time != -1.0 else "?"); col += 1 - worksheet.write(row, col, speed_up); col += 1 - worksheet.write(row, col, efficiency, right_border); col += 1 + worksheet.write(row, col, par_time if par_time != -1.0 else "?") + col += 1 + worksheet.write(row, col, speed_up) + col += 1 + worksheet.write(row, col, efficiency, right_border) + col += 1 row += 1 @@ -109,6 +128,7 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict) row.append(val / seq_time if val != -1.0 else "?") writer.writerow(row) + parser = argparse.ArgumentParser() parser.add_argument( "-i", "--input", help="Input file path (logs of perf tests, .txt)", required=True @@ -236,12 +256,16 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict) cpu_num_env = os.environ.get("PPC_NUM_PROC") if cpu_num_env is None: - raise EnvironmentError("Required environment variable 'PPC_NUM_PROC' is not set.") + raise EnvironmentError( + "Required environment variable 'PPC_NUM_PROC' is not set." + ) cpu_num = int(cpu_num_env) cols = _columns_for_category(category) # Excel - wb_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx") + wb_path = os.path.join( + xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx" + ) workbook = xlsxwriter.Workbook(wb_path) worksheet = workbook.add_worksheet() _write_excel_sheet(workbook, worksheet, cpu_num, tasks_list, cols, table_data) @@ -249,5 +273,7 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict) # CSV header = ["Task", "SEQ"] + [c.upper() for c in cols[1:]] - csv_path = os.path.join(xlsx_path, f"{category}_" + table_name + "_perf_table.csv") + csv_path = os.path.join( + xlsx_path, f"{category}_" + table_name + "_perf_table.csv" + ) _write_csv(csv_path, header, tasks_list, table_data) From 0a0806a3584a44d24682a569eb48ac5acd6ab00e Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Fri, 10 Oct 2025 23:27:42 +0200 Subject: [PATCH 3/5] refactor performance statistics handling to improve key resolution and alignment with task directories --- scoreboard/main.py | 91 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 7 deletions(-) diff --git a/scoreboard/main.py b/scoreboard/main.py index 92015ec4..ec187d3a 100644 --- a/scoreboard/main.py +++ b/scoreboard/main.py @@ -700,13 +700,13 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]: (p for p in candidates_processes if p.exists()), candidates_processes[0] ) - # Read and merge performance statistics CSVs + # Read and merge performance statistics CSVs (keys = CSV Task column) perf_stats_threads = load_performance_data_threads(threads_csv) perf_stats_processes = load_performance_data_processes(processes_csv) - perf_stats: dict[str, dict] = {} - perf_stats.update(perf_stats_threads) + perf_stats_raw: dict[str, dict] = {} + perf_stats_raw.update(perf_stats_threads) for k, v in perf_stats_processes.items(): - perf_stats[k] = {**perf_stats.get(k, {}), **v} + perf_stats_raw[k] = {**perf_stats_raw.get(k, {}), **v} # Partition tasks by tasks_type from settings.json threads_task_dirs = [ @@ -724,6 +724,73 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]: elif "processes" in name: processes_task_dirs.append(name) + # Resolve performance stats keys (from CSV Task names) to actual task directories + import re as _re + + def _family_from_name(name: str) -> tuple[str, int]: + # Infer family from CSV Task value, using only structural markers + # threads -> ("threads", 0); processes[_N] -> ("processes", N|1) + if "threads" in name: + return "threads", 0 + if "processes" in name: + m = _re.search(r"processes(?:_(\d+))?", name) + if m: + try: + idx = int(m.group(1)) if m.group(1) else 1 + except Exception: + idx = 1 + else: + idx = 1 + return "processes", idx + # Fallback: treat as threads family + return "threads", 0 + + def _family_from_dir(dir_name: str) -> tuple[str, int]: + # Prefer explicit tasks_type from settings.json and task_number from info.json + kind_guess = tasks_type_map.get(dir_name) or ( + "threads" if "threads" in dir_name else "processes" + ) + idx = 0 + if kind_guess == "processes": + # Lightweight reader to avoid dependency on later-scoped helpers + try: + import json as _json + + info_path = tasks_dir / dir_name / "info.json" + if info_path.exists(): + with open(info_path, "r") as _f: + data = _json.load(_f) + s = data.get("student", {}) if isinstance(data, dict) else {} + try: + idx = int(str(s.get("task_number", "0"))) + except Exception: + idx = 0 + except Exception: + idx = 0 + return kind_guess, idx + + # Build map family -> list of dir names in this repo + family_to_dirs: dict[tuple[str, int], list[str]] = {} + for d in sorted(directories.keys()): + fam = _family_from_dir(d) + family_to_dirs.setdefault(fam, []).append(d) + + # Aggregate perf by family (CSV keys may not match dir names) + perf_by_family: dict[tuple[str, int], dict] = {} + for key, vals in perf_stats_raw.items(): + fam = _family_from_name(key) + perf_by_family[fam] = {**perf_by_family.get(fam, {}), **vals} + + # Project family perf onto actual directories (prefer exact one per family) + perf_stats: dict[str, dict] = {} + for fam, vals in perf_by_family.items(): + dirs_for_family = family_to_dirs.get(fam, []) + if not dirs_for_family: + continue + # Assign same perf to all dirs in the family (usually one) + for d in dirs_for_family: + perf_stats[d] = vals.copy() + # Build rows for each page threads_rows = _build_rows_for_task_types( task_types_threads, @@ -758,7 +825,7 @@ def _identity_key(student: dict) -> str: ] ) - def _build_cell(dir_name: str, ttype: str): + def _build_cell(dir_name: str, ttype: str, perf_map: dict[str, dict]): status = directories[dir_name].get(ttype) sol_points, solution_style = get_solution_points_and_style(ttype, status, cfg) task_points = sol_points @@ -766,7 +833,7 @@ def _build_cell(dir_name: str, ttype: str): dir_name, ttype, sol_points, plagiarism_cfg, cfg, semester="processes" ) task_points += plagiarism_points - perf_val = perf_stats.get(dir_name, {}).get(ttype, "?") + perf_val = perf_map.get(dir_name, {}).get(ttype, "?") acceleration, efficiency = calculate_performance_metrics( perf_val, eff_num_proc, ttype ) @@ -832,7 +899,7 @@ def _build_cell(dir_name: str, ttype: str): proc_group_headers.append({"type": "seq"}) group_cells = [] for ttype in ["mpi", "seq"]: - cell, _ = _build_cell(d, ttype) + cell, _ = _build_cell(d, ttype, perf_stats) group_cells.append(cell) # Override displayed points for processes: S under MPI/SEQ from points-info; A points under MPI only s_mpi, s_seq, a_mpi, r_max = _find_process_points(cfg, n) @@ -948,6 +1015,16 @@ def _build_cell(dir_name: str, ttype: str): } ] + # Rebuild threads rows with resolved perf stats + threads_rows = _build_rows_for_task_types( + task_types_threads, + threads_task_dirs, + perf_stats, + cfg, + eff_num_proc, + deadlines_cfg, + ) + parser = argparse.ArgumentParser(description="Generate HTML scoreboard.") parser.add_argument( "-o", "--output", type=str, required=True, help="Output directory path" From c0ec6fadfdf22d3a1fdba7f14a92298bd8d1279f Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Sat, 18 Oct 2025 12:32:01 +0200 Subject: [PATCH 4/5] refactor performance table script: improve file handling and environment variable management --- scripts/create_perf_table.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py index 4cb1d5ce..5e096358 100644 --- a/scripts/create_perf_table.py +++ b/scripts/create_perf_table.py @@ -140,8 +140,6 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict) logs_path = os.path.abspath(args.input) xlsx_path = os.path.abspath(args.output) -list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"] - # For each perf_type (pipeline/task_run) store times per task result_tables = {"pipeline": {}, "task_run": {}} # Map task name -> category (threads|processes) @@ -149,8 +147,8 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict) # Track tasks per category to split output tasks_by_category = {"threads": set(), "processes": set()} -logs_file = open(logs_path, "r") -logs_lines = logs_file.readlines() +with open(logs_path, "r") as logs_file: + logs_lines = logs_file.readlines() for line in logs_lines: # Handle both old format: tasks/task_type/task_name:perf_type:time # and new format: namespace_task_type_enabled:perf_type:time @@ -254,11 +252,19 @@ def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict) if not tasks_list: continue - cpu_num_env = os.environ.get("PPC_NUM_PROC") - if cpu_num_env is None: - raise EnvironmentError( - "Required environment variable 'PPC_NUM_PROC' is not set." - ) + # Use appropriate env var per category + if category == "threads": + cpu_num_env = os.environ.get("PPC_NUM_THREADS") + if cpu_num_env is None: + raise EnvironmentError( + "Required environment variable 'PPC_NUM_THREADS' is not set." + ) + else: + cpu_num_env = os.environ.get("PPC_NUM_PROC") + if cpu_num_env is None: + raise EnvironmentError( + "Required environment variable 'PPC_NUM_PROC' is not set." + ) cpu_num = int(cpu_num_env) cols = _columns_for_category(category) From 17adc592a8348574c13883de3635b60a05bc4a0b Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Sat, 18 Oct 2025 13:48:07 +0200 Subject: [PATCH 5/5] refactor workflow: enhance performance data extraction with nested archive handling --- .github/workflows/pages.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index ec0f14fe..259446c2 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -84,7 +84,13 @@ jobs: - name: Extract performance data run: | mkdir -p build/perf_stat_dir + # The uploaded artifact contains a nested perf-stat.zip inside. + # First unzip extracts the inner archive; the second extracts perf_stat_dir/*. unzip -o perf-stat.zip -d . + if [ -f "perf-stat.zip" ]; then + mv -f perf-stat.zip perf-stat-inner.zip + unzip -o perf-stat-inner.zip -d . + fi - name: CMake configure run: | cmake -S . -B build -DUSE_SCOREBOARD=ON