Skip to content
6 changes: 6 additions & 0 deletions .github/workflows/pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,13 @@ jobs:
- name: Extract performance data
run: |
mkdir -p build/perf_stat_dir
# The uploaded artifact contains a nested perf-stat.zip inside.
# First unzip extracts the inner archive; the second extracts perf_stat_dir/*.
unzip -o perf-stat.zip -d .
if [ -f "perf-stat.zip" ]; then
mv -f perf-stat.zip perf-stat-inner.zip
unzip -o perf-stat-inner.zip -d .
fi
- name: CMake configure
run: |
cmake -S . -B build -DUSE_SCOREBOARD=ON
Expand Down
165 changes: 144 additions & 21 deletions scoreboard/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,26 +108,48 @@ def discover_tasks(tasks_dir, task_types):
directories, tasks_type_map = discover_tasks(tasks_dir, task_types)


def load_performance_data(perf_stat_file_path):
"""Load and parse performance statistics from CSV file."""
def load_performance_data_threads(perf_stat_file_path: Path) -> dict:
"""Load threads performance ratios (T_x/T_seq) from CSV.
Expected header: Task, SEQ, OMP, TBB, STL, ALL
"""
perf_stats: dict[str, dict] = {}
if perf_stat_file_path.exists():
with open(perf_stat_file_path, "r", newline="") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
task_name = row.get("Task")
if not task_name:
continue
perf_stats[task_name] = {
"seq": row.get("SEQ", "?"),
"omp": row.get("OMP", "?"),
"tbb": row.get("TBB", "?"),
"stl": row.get("STL", "?"),
"all": row.get("ALL", "?"),
}
else:
logger.warning("Threads perf stats CSV not found at %s", perf_stat_file_path)
return perf_stats

perf_stats = dict()

def load_performance_data_processes(perf_stat_file_path: Path) -> dict:
"""Load processes performance ratios (T_x/T_seq) from CSV.
Expected header: Task, SEQ, MPI
"""
perf_stats: dict[str, dict] = {}
if perf_stat_file_path.exists():
with open(perf_stat_file_path, "r", newline="") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
task_name = row.get("Task")
if task_name:
perf_stats[task_name] = {
"seq": row.get("SEQ", "?"),
"omp": row.get("OMP", "?"),
"tbb": row.get("TBB", "?"),
"stl": row.get("STL", "?"),
"all": row.get("ALL", "?"),
"mpi": "N/A",
}
if not task_name:
continue
perf_stats[task_name] = {
"seq": row.get("SEQ", "?"),
"mpi": row.get("MPI", "?"),
}
else:
logger.warning("Performance stats CSV not found at %s", perf_stat_file_path)
logger.warning("Processes perf stats CSV not found at %s", perf_stat_file_path)
return perf_stats


Expand Down Expand Up @@ -652,15 +674,39 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
ds = _evenly_spaced_dates(n_items, s, e)
return ds

# Locate perf CSV from CI or local runs
candidates = [
# Locate perf CSVs from CI or local runs (threads and processes)
candidates_threads = [
script_dir.parent
/ "build"
/ "perf_stat_dir"
/ "threads_task_run_perf_table.csv",
script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv",
# Fallback to old single-file name
script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv",
script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv",
]
perf_stat_file_path = next((p for p in candidates if p.exists()), candidates[0])
threads_csv = next(
(p for p in candidates_threads if p.exists()), candidates_threads[0]
)

candidates_processes = [
script_dir.parent
/ "build"
/ "perf_stat_dir"
/ "processes_task_run_perf_table.csv",
script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv",
]
processes_csv = next(
(p for p in candidates_processes if p.exists()), candidates_processes[0]
)

# Read and parse performance statistics CSV
perf_stats = load_performance_data(perf_stat_file_path)
# Read and merge performance statistics CSVs (keys = CSV Task column)
perf_stats_threads = load_performance_data_threads(threads_csv)
perf_stats_processes = load_performance_data_processes(processes_csv)
perf_stats_raw: dict[str, dict] = {}
perf_stats_raw.update(perf_stats_threads)
for k, v in perf_stats_processes.items():
perf_stats_raw[k] = {**perf_stats_raw.get(k, {}), **v}

# Partition tasks by tasks_type from settings.json
threads_task_dirs = [
Expand All @@ -678,6 +724,73 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
elif "processes" in name:
processes_task_dirs.append(name)

# Resolve performance stats keys (from CSV Task names) to actual task directories
import re as _re

def _family_from_name(name: str) -> tuple[str, int]:
# Infer family from CSV Task value, using only structural markers
# threads -> ("threads", 0); processes[_N] -> ("processes", N|1)
if "threads" in name:
return "threads", 0
if "processes" in name:
m = _re.search(r"processes(?:_(\d+))?", name)
if m:
try:
idx = int(m.group(1)) if m.group(1) else 1
except Exception:
idx = 1
else:
idx = 1
return "processes", idx
# Fallback: treat as threads family
return "threads", 0

def _family_from_dir(dir_name: str) -> tuple[str, int]:
# Prefer explicit tasks_type from settings.json and task_number from info.json
kind_guess = tasks_type_map.get(dir_name) or (
"threads" if "threads" in dir_name else "processes"
)
idx = 0
if kind_guess == "processes":
# Lightweight reader to avoid dependency on later-scoped helpers
try:
import json as _json

info_path = tasks_dir / dir_name / "info.json"
if info_path.exists():
with open(info_path, "r") as _f:
data = _json.load(_f)
s = data.get("student", {}) if isinstance(data, dict) else {}
try:
idx = int(str(s.get("task_number", "0")))
except Exception:
idx = 0
except Exception:
idx = 0
return kind_guess, idx

# Build map family -> list of dir names in this repo
family_to_dirs: dict[tuple[str, int], list[str]] = {}
for d in sorted(directories.keys()):
fam = _family_from_dir(d)
family_to_dirs.setdefault(fam, []).append(d)

# Aggregate perf by family (CSV keys may not match dir names)
perf_by_family: dict[tuple[str, int], dict] = {}
for key, vals in perf_stats_raw.items():
fam = _family_from_name(key)
perf_by_family[fam] = {**perf_by_family.get(fam, {}), **vals}

# Project family perf onto actual directories (prefer exact one per family)
perf_stats: dict[str, dict] = {}
for fam, vals in perf_by_family.items():
dirs_for_family = family_to_dirs.get(fam, [])
if not dirs_for_family:
continue
# Assign same perf to all dirs in the family (usually one)
for d in dirs_for_family:
perf_stats[d] = vals.copy()

# Build rows for each page
threads_rows = _build_rows_for_task_types(
task_types_threads,
Expand Down Expand Up @@ -712,15 +825,15 @@ def _identity_key(student: dict) -> str:
]
)

def _build_cell(dir_name: str, ttype: str):
def _build_cell(dir_name: str, ttype: str, perf_map: dict[str, dict]):
status = directories[dir_name].get(ttype)
sol_points, solution_style = get_solution_points_and_style(ttype, status, cfg)
task_points = sol_points
is_cheated, plagiarism_points = check_plagiarism_and_calculate_penalty(
dir_name, ttype, sol_points, plagiarism_cfg, cfg, semester="processes"
)
task_points += plagiarism_points
perf_val = perf_stats.get(dir_name, {}).get(ttype, "?")
perf_val = perf_map.get(dir_name, {}).get(ttype, "?")
acceleration, efficiency = calculate_performance_metrics(
perf_val, eff_num_proc, ttype
)
Expand Down Expand Up @@ -786,7 +899,7 @@ def _build_cell(dir_name: str, ttype: str):
proc_group_headers.append({"type": "seq"})
group_cells = []
for ttype in ["mpi", "seq"]:
cell, _ = _build_cell(d, ttype)
cell, _ = _build_cell(d, ttype, perf_stats)
group_cells.append(cell)
# Override displayed points for processes: S under MPI/SEQ from points-info; A points under MPI only
s_mpi, s_seq, a_mpi, r_max = _find_process_points(cfg, n)
Expand Down Expand Up @@ -902,6 +1015,16 @@ def _build_cell(dir_name: str, ttype: str):
}
]

# Rebuild threads rows with resolved perf stats
threads_rows = _build_rows_for_task_types(
task_types_threads,
threads_task_dirs,
perf_stats,
cfg,
eff_num_proc,
deadlines_cfg,
)

parser = argparse.ArgumentParser(description="Generate HTML scoreboard.")
parser.add_argument(
"-o", "--output", type=str, required=True, help="Output directory path"
Expand Down
Loading
Loading