Skip to content

Commit 67f0807

Browse files
nWEIdiapytorchmergebot
authored andcommitted
[Inductor] [CI] [CUDA] Skip the failed models and tests the better way (#127150)
Address subtasks in #126692 After enabling the disabled shards, the following two models regressed (for cu124 configuration): dynamic_inductor_timm_training.csv cspdarknet53,pass,7 (expected) | cspdarknet53,fail_accuracy,7 (actual) eca_botnext26ts_256,pass,7 (expected) | eca_botnext26ts_256,fail_accuracy,7 (actual) Pull Request resolved: #127150 Approved by: https://github.com/huydhn, https://github.com/eqy, https://github.com/atalman
1 parent 64c581a commit 67f0807

File tree

42 files changed

+10131
-6
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+10131
-6
lines changed

.ci/pytorch/test.sh

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,18 @@ elif [[ $TEST_CONFIG == 'nogpu_AVX512' ]]; then
264264
export ATEN_CPU_CAPABILITY=avx2
265265
fi
266266

267+
# temp workarounds for https://github.com/pytorch/pytorch/issues/126692, remove when fixed
268+
if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
269+
pushd test
270+
CUDA_VERSION=$(python -c "import torch; print(torch.version.cuda)")
271+
if [ "$CUDA_VERSION" == "12.4" ]; then
272+
ISCUDA124="cu124"
273+
else
274+
ISCUDA124=""
275+
fi
276+
popd
277+
fi
278+
267279
test_python_legacy_jit() {
268280
time python test/run_test.py --include test_jit_legacy test_jit_fuser_legacy --verbose
269281
assert_git_not_dirty
@@ -364,7 +376,7 @@ test_inductor_cpp_wrapper_abi_compatible() {
364376
--output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
365377
python benchmarks/dynamo/check_accuracy.py \
366378
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
367-
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv"
379+
--expected "benchmarks/dynamo/ci_expected_accuracy/${ISCUDA124}/inductor_timm_training.csv"
368380
}
369381

370382
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
@@ -526,10 +538,10 @@ test_single_dynamo_benchmark() {
526538
--output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
527539
python benchmarks/dynamo/check_accuracy.py \
528540
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
529-
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
541+
--expected "benchmarks/dynamo/ci_expected_accuracy/${ISCUDA124}/${TEST_CONFIG}_${name}.csv"
530542
python benchmarks/dynamo/check_graph_breaks.py \
531543
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
532-
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
544+
--expected "benchmarks/dynamo/ci_expected_accuracy/${ISCUDA124}/${TEST_CONFIG}_${name}.csv"
533545
fi
534546
}
535547

@@ -576,7 +588,7 @@ test_inductor_torchbench_smoketest_perf() {
576588
--bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
577589
python benchmarks/dynamo/check_accuracy.py \
578590
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
579-
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
591+
--expected "benchmarks/dynamo/ci_expected_accuracy/${ISCUDA124}/inductor_torchbench_inference.csv"
580592

581593
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
582594
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
@@ -591,7 +603,13 @@ test_inductor_torchbench_smoketest_perf() {
591603
# https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
592604
# and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
593605
# we switch to use some other model.
594-
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
606+
# Use 4.7 for cuda 12.4, change back to 4.9 after fixing https://github.com/pytorch/pytorch/issues/126692
607+
if [ "$CUDA_VERSION" == "12.4" ]; then
608+
THRESHOLD=4.7
609+
else
610+
THRESHOLD=4.9
611+
fi
612+
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t $THRESHOLD
595613

596614
# Check memory compression ratio for a few models
597615
for test in hf_Albert timm_vision_transformer; do
@@ -610,7 +628,7 @@ test_inductor_torchbench_smoketest_perf() {
610628
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
611629
python benchmarks/dynamo/check_accuracy.py \
612630
--actual "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" \
613-
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv"
631+
--expected "benchmarks/dynamo/ci_expected_accuracy/${ISCUDA124}/inductor_huggingface_training.csv"
614632
done
615633
}
616634

.github/workflows/inductor.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,15 @@ jobs:
140140
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
141141
{ config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" },
142142
{ config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
143+
{ config: "inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
143144
{ config: "inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
144145
{ config: "inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
146+
{ config: "inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
145147
{ config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
148+
{ config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
146149
{ config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
147150
{ config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
151+
{ config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
148152
{ config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
149153
{ config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
150154
{ config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
@@ -192,6 +196,18 @@ jobs:
192196
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
193197
]}
194198
199+
linux-focal-cuda12_4-py3_10-gcc9-inductor-test-gcp:
200+
name: cuda12.4-py3.10-gcc9-sm80
201+
uses: ./.github/workflows/_linux-test.yml
202+
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp
203+
with:
204+
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
205+
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }}
206+
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }}
207+
use-gha: anything-non-empty-to-use-gha
208+
secrets:
209+
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
210+
195211
linux-focal-cuda12_4-py3_12-gcc9-inductor-test:
196212
name: cuda12.4-py3.12-gcc9-sm86
197213
uses: ./.github/workflows/_linux-test.yml
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
name,accuracy,graph_breaks
2+
3+
4+
5+
AlbertForMaskedLM,pass,0
6+
7+
8+
9+
AlbertForQuestionAnswering,pass,0
10+
11+
12+
13+
AllenaiLongformerBase,pass,4
14+
15+
16+
17+
BartForCausalLM,pass,0
18+
19+
20+
21+
BartForConditionalGeneration,pass,0
22+
23+
24+
25+
BertForMaskedLM,pass,0
26+
27+
28+
29+
BertForQuestionAnswering,pass,0
30+
31+
32+
33+
BlenderbotForCausalLM,pass_due_to_skip,0
34+
35+
36+
37+
BlenderbotSmallForCausalLM,pass,0
38+
39+
40+
41+
BlenderbotSmallForConditionalGeneration,pass,0
42+
43+
44+
45+
CamemBert,pass,0
46+
47+
48+
49+
DebertaForMaskedLM,pass,0
50+
51+
52+
53+
DebertaForQuestionAnswering,pass,0
54+
55+
56+
57+
DebertaV2ForMaskedLM,pass_due_to_skip,0
58+
59+
60+
61+
DebertaV2ForQuestionAnswering,pass,0
62+
63+
64+
65+
DistilBertForMaskedLM,pass,0
66+
67+
68+
69+
DistilBertForQuestionAnswering,pass,0
70+
71+
72+
73+
DistillGPT2,pass,0
74+
75+
76+
77+
ElectraForCausalLM,pass,0
78+
79+
80+
81+
ElectraForQuestionAnswering,pass,0
82+
83+
84+
85+
GPT2ForSequenceClassification,pass,2
86+
87+
88+
89+
GoogleFnet,pass,0
90+
91+
92+
93+
LayoutLMForMaskedLM,pass,0
94+
95+
96+
97+
LayoutLMForSequenceClassification,pass,2
98+
99+
100+
101+
M2M100ForConditionalGeneration,pass,0
102+
103+
104+
105+
MBartForCausalLM,pass,0
106+
107+
108+
109+
MBartForConditionalGeneration,pass,0
110+
111+
112+
113+
MT5ForConditionalGeneration,pass,0
114+
115+
116+
117+
MegatronBertForCausalLM,pass,0
118+
119+
120+
121+
MegatronBertForQuestionAnswering,pass,0
122+
123+
124+
125+
MobileBertForMaskedLM,pass,0
126+
127+
128+
129+
MobileBertForQuestionAnswering,pass,0
130+
131+
132+
133+
OPTForCausalLM,pass,0
134+
135+
136+
137+
PLBartForCausalLM,pass,0
138+
139+
140+
141+
PLBartForConditionalGeneration,pass,0
142+
143+
144+
145+
PegasusForCausalLM,pass,0
146+
147+
148+
149+
PegasusForConditionalGeneration,pass,0
150+
151+
152+
153+
RobertaForCausalLM,pass,0
154+
155+
156+
157+
RobertaForQuestionAnswering,pass,0
158+
159+
160+
161+
Speech2Text2ForCausalLM,pass,0
162+
163+
164+
165+
T5ForConditionalGeneration,pass,0
166+
167+
168+
169+
T5Small,pass,0
170+
171+
172+
173+
TrOCRForCausalLM,pass,0
174+
175+
176+
177+
XGLMForCausalLM,pass,0
178+
179+
180+
181+
XLNetLMHeadModel,pass,0
182+
183+
184+
185+
YituTechConvBert,pass,0

0 commit comments

Comments
 (0)