From aa49b2bcda4b664c4cc6a65731f53c39cd39c7f0 Mon Sep 17 00:00:00 2001 From: Antoni Zwolski Date: Mon, 3 Nov 2025 11:53:52 +0100 Subject: [PATCH 1/7] [InstCombine] Limit canonicalization of extractelement(cast) to constant index --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 18a45c6799bac..87b5f3a58e72b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -588,7 +588,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // Canonicalize extractelement(cast) -> cast(extractelement). // Bitcasts can change the number of vector elements, and they cost // nothing. - if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { + if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) && isa(Index)) { Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } From 298613cffab3be828db15b6025ba310bb7e5a716 Mon Sep 17 00:00:00 2001 From: Antoni Zwolski Date: Mon, 3 Nov 2025 11:55:30 +0100 Subject: [PATCH 2/7] [InstCombine] Update vec_extract_var_elt.ll test checks --- llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll index 205b4b88c473a..c01cdae81b81c 100644 --- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll +++ b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll @@ -5,10 +5,10 @@ define void @test_poison(float %b, ptr %p) { ; CHECK-LABEL: define void @test_poison( ; CHECK-SAME: float [[B:%.*]], ptr [[P:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 32 +; CHECK-NEXT: [[A:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[B]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -2 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[A]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 7 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <8 x i32> [[TMP6]] to <8 x float> ; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[P]], align 32 @@ -45,14 +45,14 @@ define void @test_loop(<4 x float> %in) { ; CHECK-SAME: <4 x float> [[IN:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[R:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[IN]], i32 9) +; CHECK-NEXT: [[VI:%.*]] = fptosi <4 x float> [[R]] to <4 x i32> ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LATCH:.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp samesign ult i32 [[I]], 4 ; CHECK-NEXT: br i1 [[COND]], label %[[BODY:.*]], label %[[DONE:.*]] ; CHECK: [[BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[R]], i32 [[I]] -; CHECK-NEXT: [[ELEM:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: [[ELEM:%.*]] = extractelement <4 x i32> [[VI]], i32 [[I]] ; CHECK-NEXT: call void @use(i32 [[ELEM]]) ; CHECK-NEXT: br label %[[LATCH]] ; CHECK: [[LATCH]]: From 91e0c1b2cdd8564470cacfbf9c45ab6ca6713ee7 Mon Sep 17 00:00:00 2001 From: Antoni Zwolski Date: Mon, 3 Nov 2025 14:52:47 +0100 Subject: [PATCH 3/7] [InstCombine] Refactor canonicalization of extractelement(cast) to constant index or same basic block. --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 87b5f3a58e72b..5af7c4caab074 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -588,9 +588,12 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // Canonicalize extractelement(cast) -> cast(extractelement). // Bitcasts can change the number of vector elements, and they cost // nothing. - if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) && isa(Index)) { - Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); - return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)){ + Instruction *U = cast(*CI->user_begin()); + if (U->getParent() == CI->getParent() || isa(Index)){ + Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } } } } From 742d97e9a1168669b22bbb03680489416b48595b Mon Sep 17 00:00:00 2001 From: Antoni Zwolski Date: Mon, 3 Nov 2025 21:11:27 +0100 Subject: [PATCH 4/7] [InstCombine] Add test_poison_branch test and update vec_extract_var_elt.ll test checks --- .../InstCombine/vec_extract_var_elt.ll | 43 ++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll index c01cdae81b81c..35d11b0cdf43f 100644 --- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll +++ b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll @@ -5,10 +5,10 @@ define void @test_poison(float %b, ptr %p) { ; CHECK-LABEL: define void @test_poison( ; CHECK-SAME: float [[B:%.*]], ptr [[P:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 32 -; CHECK-NEXT: [[A:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[B]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[A]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fptosi float [[TMP4]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 7 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <8 x i32> [[TMP6]] to <8 x float> ; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[P]], align 32 @@ -39,13 +39,45 @@ define i32 @test_bitcast(i32 %i) { } declare void @use(i32) +declare void @use_vi(<4 x i32>) + +define void @test_poison_branch(<4 x float> %in, i32 %a, i1 %cond) { +; CHECK-LABEL: define void @test_poison_branch( +; CHECK-SAME: <4 x float> [[IN:%.*]], i32 [[A:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[I:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: call void @use(i32 [[I]]) +; CHECK-NEXT: br label %[[DONE:.*]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[IN]], i32 [[I]] +; CHECK-NEXT: [[ELEM:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[ELEM]]) +; CHECK-NEXT: br label %[[DONE]] +; CHECK: [[DONE]]: +; CHECK-NEXT: ret void +; +entry: + %vi = fptosi <4 x float> %in to <4 x i32> + %i = add i32 %a, -2 + br i1 %cond, label %true, label %false +true: + call void @use(i32 %i) + br label %done +false: + %elem = extractelement <4 x i32> %vi, i32 %i + call void @use(i32 %elem) + br label %done +done: + ret void +} define void @test_loop(<4 x float> %in) { ; CHECK-LABEL: define void @test_loop( ; CHECK-SAME: <4 x float> [[IN:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[R:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[IN]], i32 9) -; CHECK-NEXT: [[VI:%.*]] = fptosi <4 x float> [[R]] to <4 x i32> +; CHECK-NEXT: [[VI:%.*]] = fptosi <4 x float> [[IN]] to <4 x i32> ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LATCH:.*]] ] @@ -62,8 +94,7 @@ define void @test_loop(<4 x float> %in) { ; CHECK-NEXT: ret void ; entry: - %r = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %in, i32 9) - %vi = fptosi <4 x float> %r to <4 x i32> + %vi = fptosi <4 x float> %in to <4 x i32> br label %loop loop: %i = phi i32 [ 0, %entry ], [ %next, %latch ] From 4d8d8fdd9099e4384a215ed5d2e90e9aa690975e Mon Sep 17 00:00:00 2001 From: Antoni Zwolski Date: Mon, 3 Nov 2025 21:13:40 +0100 Subject: [PATCH 5/7] [InstCombine] Remove unused declaration of @use_vi in vec_extract_var_elt.ll --- llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll index 35d11b0cdf43f..f96b7070f9f2a 100644 --- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll +++ b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll @@ -39,7 +39,6 @@ define i32 @test_bitcast(i32 %i) { } declare void @use(i32) -declare void @use_vi(<4 x i32>) define void @test_poison_branch(<4 x float> %in, i32 %a, i1 %cond) { ; CHECK-LABEL: define void @test_poison_branch( From 4a1267a38008d8472c80114b32fafc62d800e9c8 Mon Sep 17 00:00:00 2001 From: Antoni Zwolski Date: Mon, 3 Nov 2025 21:31:20 +0100 Subject: [PATCH 6/7] [InstCombine] Fix formatting in visitExtractElementInst --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 5af7c4caab074..44c3863dd97b5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -588,9 +588,9 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // Canonicalize extractelement(cast) -> cast(extractelement). // Bitcasts can change the number of vector elements, and they cost // nothing. - if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)){ + if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { Instruction *U = cast(*CI->user_begin()); - if (U->getParent() == CI->getParent() || isa(Index)){ + if (U->getParent() == CI->getParent() || isa(Index)) { Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } From 99da1283f75dad503a96408d19d1cfc9d7b7e872 Mon Sep 17 00:00:00 2001 From: Antoni Zwolski Date: Wed, 19 Nov 2025 14:47:22 +0100 Subject: [PATCH 7/7] [InstCombine] Simplify conditional checks for extractelement(cast) canonicalization --- .../Transforms/InstCombine/InstCombineVectorOps.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 44c3863dd97b5..5d992f48d0544 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -588,12 +588,10 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // Canonicalize extractelement(cast) -> cast(extractelement). // Bitcasts can change the number of vector elements, and they cost // nothing. - if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { - Instruction *U = cast(*CI->user_begin()); - if (U->getParent() == CI->getParent() || isa(Index)) { - Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); - return CastInst::Create(CI->getOpcode(), EE, EI.getType()); - } + if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) && + (EI.getParent() == CI->getParent() || isa(Index))) { + Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } } }