Skip to content
Merged

sync #9256

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
9b2cb87
Fix macos test-model jobs (#9235)
mergennachin Mar 13, 2025
08f0f7a
Another fix for CI job refactoring (#9237)
mergennachin Mar 13, 2025
570e06c
Build flatc for the host instead of the target platform (#9077)
jathu Mar 13, 2025
1c2a69e
[build Folder Migration] Move build/Codegen.cmake (#9185)
jathu Mar 13, 2025
1a918c7
Move ModelDataKit to ExecuTorch directory
bsoyluoglu Mar 13, 2025
54c3f78
Add thread_parallel_interface to all_deps for portable util (#9242)
swolchok Mar 13, 2025
2407647
Put extension/parallel buck files back (#9232)
swolchok Mar 13, 2025
4f95fd0
Add back linux pull jobs (#9239)
mergennachin Mar 13, 2025
630d0cc
Don't use designated initializers in QueryPool.cpp
SamGondelman Mar 13, 2025
a131826
[Benchmark]Deprecate v2 (#9238)
yangw-dev Mar 13, 2025
e91c085
Adding dummy coreml backend to silence uquery failures
ChristianWLang Mar 13, 2025
ce612b8
ping a newer pytorch nightly to include recent export updates
iseeyuan Mar 13, 2025
e9cf64a
fix building with CMake + Ninja after #9077 (#9246)
swolchok Mar 13, 2025
b5d8e3b
Move ExecutorchRuntimeValueSupport and ExecutorchRuntimeBridge to xplat
bsoyluoglu Mar 13, 2025
27bacff
Export Mimi model to ExecuTorch
iseeyuan Mar 13, 2025
718aa6f
Reduce macOS CI jobs and add more Arm64 jobs. (#9228)
mergennachin Mar 13, 2025
699ee7d
Add llama jobs on Arm64 and reduce llama jobs on MacOS (#9251)
mergennachin Mar 14, 2025
9a0c2db
Split android instrumentation from build script
kirklandsign Mar 14, 2025
7901539
Move MacOS jobs (phi-4-mini, qwen2_5) etc to Arm64 (#9254)
mergennachin Mar 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 48 additions & 135 deletions .github/scripts/extract_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,36 +86,6 @@ def parse_args() -> Any:
action=ValidateDir,
help="the directory to keep the benchmark results",
)
parser.add_argument(
"--repo",
type=str,
required=True,
help="which GitHub repo this workflow run belongs to",
)
parser.add_argument(
"--head-branch",
type=str,
required=True,
help="the head branch that runs",
)
parser.add_argument(
"--workflow-name",
type=str,
required=True,
help="the name of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-id",
type=int,
required=True,
help="the id of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-attempt",
type=int,
required=True,
help="which retry of the workflow this is",
)
parser.add_argument(
"--benchmark-configs",
type=str,
Expand Down Expand Up @@ -153,9 +123,10 @@ def extract_android_benchmark_results(
# This is to handle the case where there is no benchmark results
warning(f"Fail to load the benchmark results from {artifact_s3_url}")
return []
return []


def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
def initialize_ios_metadata(test_name: str) -> Dict[str, Any]:
"""
Extract the benchmark metadata from the test name, for example:
test_forward_llama2_pte_iOS_17_2_1_iPhone15_4
Expand Down Expand Up @@ -364,14 +335,7 @@ def transform(
app_type: str,
benchmark_results: List,
benchmark_config: Dict[str, str],
repo: str,
head_branch: str,
workflow_name: str,
workflow_run_id: int,
workflow_run_attempt: int,
job_name: str,
job_id: int,
schema_version: str,
) -> List:
"""
Transform the benchmark results into the format writable into the benchmark database
Expand All @@ -381,87 +345,51 @@ def transform(
for r in benchmark_results:
r["deviceInfo"]["device"] = job_name

if schema_version == "v2":
# TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
return [
{
# GH-info to identify where the benchmark is run
"repo": repo,
"head_branch": head_branch,
"workflow_id": workflow_run_id,
"run_attempt": workflow_run_attempt,
"job_id": job_id,
# The model
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
"dtype": (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
),
# The metric value
"metric": r["metric"],
"actual": r["actualValue"],
"target": r["targetValue"],
# The device
"device": r["deviceInfo"]["device"],
"arch": r["deviceInfo"].get("os", ""),
# Not used here, just set it to something unique here
"filename": workflow_name,
"test_name": app_type,
"runner": job_name,
}
for r in benchmark_results
]
elif schema_version == "v3":
v3_benchmark_results = []
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
return [
{
"benchmark": {
"name": "ExecuTorch",
"mode": "inference",
"extra_info": {
"app_type": app_type,
# Just keep a copy of the benchmark config here
"benchmark_config": json.dumps(benchmark_config),
},
},
"model": {
"name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
"type": "OSS model",
"backend": benchmark_config.get(
"config", r["benchmarkModel"].get("backend", "")
),
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
return [
{
"benchmark": {
"name": "ExecuTorch",
"mode": "inference",
"extra_info": {
"app_type": app_type,
# Just keep a copy of the benchmark config here
"benchmark_config": json.dumps(benchmark_config),
},
"metric": {
"name": r["metric"],
"benchmark_values": [r["actualValue"]],
"target_value": r["targetValue"],
"extra_info": {
"method": r.get("method", ""),
},
},
"model": {
"name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
"type": "OSS model",
"backend": benchmark_config.get(
"config", r["benchmarkModel"].get("backend", "")
),
},
"metric": {
"name": r["metric"],
"benchmark_values": [r["actualValue"]],
"target_value": r["targetValue"],
"extra_info": {
"method": r.get("method", ""),
},
"runners": [
{
"name": r["deviceInfo"]["device"],
"type": r["deviceInfo"]["os"],
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
}
],
}
for r in benchmark_results
]
},
"runners": [
{
"name": r["deviceInfo"]["device"],
"type": r["deviceInfo"]["os"],
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
}
],
}
for r in benchmark_results
]


def main() -> None:
args = parse_args()

# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
all_benchmark_results = {
"v2": [],
"v3": [],
}
all_benchmark_results = []
benchmark_config = {}

with open(args.artifacts) as f:
Expand All @@ -482,7 +410,7 @@ def main() -> None:
benchmark_config = read_benchmark_config(
artifact_s3_url, args.benchmark_configs
)

benchmark_results = []
if app_type == "ANDROID_APP":
benchmark_results = extract_android_benchmark_results(
job_name, artifact_type, artifact_s3_url
Expand All @@ -494,32 +422,17 @@ def main() -> None:
)

if benchmark_results:
for schema in all_benchmark_results.keys():
results = transform(
app_type,
benchmark_results,
benchmark_config,
args.repo,
args.head_branch,
args.workflow_name,
args.workflow_run_id,
args.workflow_run_attempt,
job_name,
extract_job_id(args.artifacts),
schema,
)
all_benchmark_results[schema].extend(results)

for schema in all_benchmark_results.keys():
if not all_benchmark_results.get(schema):
continue

output_dir = os.path.join(args.output_dir, schema)
os.makedirs(output_dir, exist_ok=True)
results = transform(
app_type, benchmark_results, benchmark_config, job_name
)
all_benchmark_results.extend(results)

# add v3 in case we have higher version of schema
output_dir = os.path.join(args.output_dir, "v3")
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.basename(args.artifacts)
with open(f"{output_dir}/{output_file}", "w") as f:
json.dump(all_benchmark_results[schema], f)
json.dump(all_benchmark_results, f)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/_android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:

# Build LLM Demo for Android
bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
bash build/build_android_instrumentation.sh

# Running Android emulator directly on the runner and not using Docker
run-emulator:
Expand Down
21 changes: 3 additions & 18 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -462,29 +462,14 @@ jobs:
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
--artifacts "${ARTIFACTS_BY_JOB}" \
--output-dir benchmark-results \
--repo ${{ github.repository }} \
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name "${{ github.workflow }}" \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }} \
--benchmark-configs benchmark-configs
done

for SCHEMA in v2 v3; do
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done
for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done

# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
- name: Upload the benchmark results (v2)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-results/v2
dry-run: false
schema-version: v2

- name: Upload the benchmark results (v3)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
Expand Down
21 changes: 3 additions & 18 deletions .github/workflows/apple-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -521,29 +521,14 @@ jobs:
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
--artifacts "${ARTIFACTS_BY_JOB}" \
--output-dir benchmark-results \
--repo ${{ github.repository }} \
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name "${{ github.workflow }}" \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }} \
--benchmark-configs benchmark-configs
done

for SCHEMA in v2 v3; do
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done
for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done

# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
- name: Upload the benchmark results (v2)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-results/v2
dry-run: false
schema-version: v2

- name: Upload the benchmark results (v3)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
Expand Down
Loading
Loading