Skip to content

Commit c90a194

Browse files
committed
[ARM] shouldFoldMaskToVariableShiftPair should be true for scalars up to the biggest legal type
For ARM, we want to do this for scalars up to the biggest legal type.
1 parent a4faee1 commit c90a194

File tree

4 files changed

+112
-146
lines changed

4 files changed

+112
-146
lines changed

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,16 @@ class VectorType;
775775
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
776776
CombineLevel Level) const override;
777777

778+
/// Return true if it is profitable to fold a pair of shifts into a mask.
779+
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
780+
EVT VT = Y.getValueType();
781+
782+
if (VT.isVector())
783+
return false;
784+
785+
return VT.getScalarSizeInBits() <= 32;
786+
}
787+
778788
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
779789
unsigned SelectOpcode, SDValue X,
780790
SDValue Y) const override;

llvm/test/CodeGen/ARM/and-mask-variable.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,26 @@
77
define i32 @mask_pair(i32 %x, i32 %y) {
88
; V7M-LABEL: mask_pair:
99
; V7M: @ %bb.0:
10-
; V7M-NEXT: mov.w r2, #-1
11-
; V7M-NEXT: lsl.w r1, r2, r1
12-
; V7M-NEXT: ands r0, r1
10+
; V7M-NEXT: lsrs r0, r1
11+
; V7M-NEXT: lsls r0, r1
1312
; V7M-NEXT: bx lr
1413
;
1514
; V7A-LABEL: mask_pair:
1615
; V7A: @ %bb.0:
17-
; V7A-NEXT: mvn r2, #0
18-
; V7A-NEXT: and r0, r0, r2, lsl r1
16+
; V7A-NEXT: lsr r0, r0, r1
17+
; V7A-NEXT: lsl r0, r0, r1
1918
; V7A-NEXT: bx lr
2019
;
2120
; V7A-T-LABEL: mask_pair:
2221
; V7A-T: @ %bb.0:
23-
; V7A-T-NEXT: mov.w r2, #-1
24-
; V7A-T-NEXT: lsl.w r1, r2, r1
25-
; V7A-T-NEXT: ands r0, r1
22+
; V7A-T-NEXT: lsrs r0, r1
23+
; V7A-T-NEXT: lsls r0, r1
2624
; V7A-T-NEXT: bx lr
2725
;
2826
; V6M-LABEL: mask_pair:
2927
; V6M: @ %bb.0:
30-
; V6M-NEXT: movs r2, #0
31-
; V6M-NEXT: mvns r2, r2
32-
; V6M-NEXT: lsls r2, r1
33-
; V6M-NEXT: ands r0, r2
28+
; V6M-NEXT: lsrs r0, r1
29+
; V6M-NEXT: lsls r0, r1
3430
; V6M-NEXT: bx lr
3531
%shl = shl nsw i32 -1, %y
3632
%and = and i32 %shl, %x

llvm/test/CodeGen/ARM/extract-bits.ll

Lines changed: 64 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2151,30 +2151,27 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
21512151
;
21522152
; V7A-LABEL: bextr32_c0:
21532153
; V7A: @ %bb.0:
2154-
; V7A-NEXT: rsb r2, r2, #32
2155-
; V7A-NEXT: mvn r3, #0
2156-
; V7A-NEXT: lsr r2, r3, r2
2157-
; V7A-NEXT: and r0, r2, r0, lsr r1
2154+
; V7A-NEXT: lsr r0, r0, r1
2155+
; V7A-NEXT: rsb r1, r2, #32
2156+
; V7A-NEXT: lsl r0, r0, r1
2157+
; V7A-NEXT: lsr r0, r0, r1
21582158
; V7A-NEXT: bx lr
21592159
;
21602160
; V7A-T-LABEL: bextr32_c0:
21612161
; V7A-T: @ %bb.0:
21622162
; V7A-T-NEXT: lsrs r0, r1
21632163
; V7A-T-NEXT: rsb.w r1, r2, #32
2164-
; V7A-T-NEXT: mov.w r2, #-1
2165-
; V7A-T-NEXT: lsr.w r1, r2, r1
2166-
; V7A-T-NEXT: ands r0, r1
2164+
; V7A-T-NEXT: lsls r0, r1
2165+
; V7A-T-NEXT: lsrs r0, r1
21672166
; V7A-T-NEXT: bx lr
21682167
;
21692168
; V6M-LABEL: bextr32_c0:
21702169
; V6M: @ %bb.0:
2170+
; V6M-NEXT: movs r3, #32
2171+
; V6M-NEXT: subs r2, r3, r2
21712172
; V6M-NEXT: lsrs r0, r1
2172-
; V6M-NEXT: movs r1, #32
2173-
; V6M-NEXT: subs r1, r1, r2
2174-
; V6M-NEXT: movs r2, #0
2175-
; V6M-NEXT: mvns r2, r2
2176-
; V6M-NEXT: lsrs r2, r1
2177-
; V6M-NEXT: ands r0, r2
2173+
; V6M-NEXT: lsls r0, r2
2174+
; V6M-NEXT: lsrs r0, r2
21782175
; V6M-NEXT: bx lr
21792176
%shifted = lshr i32 %val, %numskipbits
21802177
%numhighbits = sub i32 32, %numlowbits
@@ -2196,23 +2193,22 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
21962193
;
21972194
; V7A-LABEL: bextr32_c1_indexzext:
21982195
; V7A: @ %bb.0:
2199-
; V7A-NEXT: rsb r2, r2, #32
2200-
; V7A-NEXT: mvn r3, #0
22012196
; V7A-NEXT: uxtb r1, r1
2202-
; V7A-NEXT: uxtb r2, r2
2203-
; V7A-NEXT: lsr r2, r3, r2
2204-
; V7A-NEXT: and r0, r2, r0, lsr r1
2197+
; V7A-NEXT: lsr r0, r0, r1
2198+
; V7A-NEXT: rsb r1, r2, #32
2199+
; V7A-NEXT: uxtb r1, r1
2200+
; V7A-NEXT: lsl r0, r0, r1
2201+
; V7A-NEXT: lsr r0, r0, r1
22052202
; V7A-NEXT: bx lr
22062203
;
22072204
; V7A-T-LABEL: bextr32_c1_indexzext:
22082205
; V7A-T: @ %bb.0:
22092206
; V7A-T-NEXT: uxtb r1, r1
22102207
; V7A-T-NEXT: lsrs r0, r1
22112208
; V7A-T-NEXT: rsb.w r1, r2, #32
2212-
; V7A-T-NEXT: mov.w r2, #-1
22132209
; V7A-T-NEXT: uxtb r1, r1
2214-
; V7A-T-NEXT: lsr.w r1, r2, r1
2215-
; V7A-T-NEXT: ands r0, r1
2210+
; V7A-T-NEXT: lsls r0, r1
2211+
; V7A-T-NEXT: lsrs r0, r1
22162212
; V7A-T-NEXT: bx lr
22172213
;
22182214
; V6M-LABEL: bextr32_c1_indexzext:
@@ -2222,10 +2218,8 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
22222218
; V6M-NEXT: movs r1, #32
22232219
; V6M-NEXT: subs r1, r1, r2
22242220
; V6M-NEXT: uxtb r1, r1
2225-
; V6M-NEXT: movs r2, #0
2226-
; V6M-NEXT: mvns r2, r2
2227-
; V6M-NEXT: lsrs r2, r1
2228-
; V6M-NEXT: ands r0, r2
2221+
; V6M-NEXT: lsls r0, r1
2222+
; V6M-NEXT: lsrs r0, r1
22292223
; V6M-NEXT: bx lr
22302224
%skip = zext i8 %numskipbits to i32
22312225
%shifted = lshr i32 %val, %skip
@@ -2249,32 +2243,29 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
22492243
; V7A-LABEL: bextr32_c2_load:
22502244
; V7A: @ %bb.0:
22512245
; V7A-NEXT: ldr r0, [r0]
2252-
; V7A-NEXT: rsb r2, r2, #32
2253-
; V7A-NEXT: mvn r3, #0
2254-
; V7A-NEXT: lsr r2, r3, r2
2255-
; V7A-NEXT: and r0, r2, r0, lsr r1
2246+
; V7A-NEXT: lsr r0, r0, r1
2247+
; V7A-NEXT: rsb r1, r2, #32
2248+
; V7A-NEXT: lsl r0, r0, r1
2249+
; V7A-NEXT: lsr r0, r0, r1
22562250
; V7A-NEXT: bx lr
22572251
;
22582252
; V7A-T-LABEL: bextr32_c2_load:
22592253
; V7A-T: @ %bb.0:
22602254
; V7A-T-NEXT: ldr r0, [r0]
2261-
; V7A-T-NEXT: rsb.w r2, r2, #32
2262-
; V7A-T-NEXT: mov.w r3, #-1
2263-
; V7A-T-NEXT: lsr.w r2, r3, r2
22642255
; V7A-T-NEXT: lsrs r0, r1
2265-
; V7A-T-NEXT: ands r0, r2
2256+
; V7A-T-NEXT: rsb.w r1, r2, #32
2257+
; V7A-T-NEXT: lsls r0, r1
2258+
; V7A-T-NEXT: lsrs r0, r1
22662259
; V7A-T-NEXT: bx lr
22672260
;
22682261
; V6M-LABEL: bextr32_c2_load:
22692262
; V6M: @ %bb.0:
2270-
; V6M-NEXT: ldr r3, [r0]
2271-
; V6M-NEXT: lsrs r3, r1
2272-
; V6M-NEXT: movs r0, #32
2273-
; V6M-NEXT: subs r1, r0, r2
2274-
; V6M-NEXT: movs r0, #0
2275-
; V6M-NEXT: mvns r0, r0
2263+
; V6M-NEXT: movs r3, #32
2264+
; V6M-NEXT: subs r2, r3, r2
2265+
; V6M-NEXT: ldr r0, [r0]
22762266
; V6M-NEXT: lsrs r0, r1
2277-
; V6M-NEXT: ands r0, r3
2267+
; V6M-NEXT: lsls r0, r2
2268+
; V6M-NEXT: lsrs r0, r2
22782269
; V6M-NEXT: bx lr
22792270
%val = load i32, ptr %w
22802271
%shifted = lshr i32 %val, %numskipbits
@@ -2298,13 +2289,13 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
22982289
;
22992290
; V7A-LABEL: bextr32_c3_load_indexzext:
23002291
; V7A: @ %bb.0:
2301-
; V7A-NEXT: rsb r2, r2, #32
23022292
; V7A-NEXT: ldr r0, [r0]
2303-
; V7A-NEXT: mvn r3, #0
23042293
; V7A-NEXT: uxtb r1, r1
2305-
; V7A-NEXT: uxtb r2, r2
2306-
; V7A-NEXT: lsr r2, r3, r2
2307-
; V7A-NEXT: and r0, r2, r0, lsr r1
2294+
; V7A-NEXT: lsr r0, r0, r1
2295+
; V7A-NEXT: rsb r1, r2, #32
2296+
; V7A-NEXT: uxtb r1, r1
2297+
; V7A-NEXT: lsl r0, r0, r1
2298+
; V7A-NEXT: lsr r0, r0, r1
23082299
; V7A-NEXT: bx lr
23092300
;
23102301
; V7A-T-LABEL: bextr32_c3_load_indexzext:
@@ -2313,24 +2304,21 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
23132304
; V7A-T-NEXT: uxtb r1, r1
23142305
; V7A-T-NEXT: lsrs r0, r1
23152306
; V7A-T-NEXT: rsb.w r1, r2, #32
2316-
; V7A-T-NEXT: mov.w r2, #-1
23172307
; V7A-T-NEXT: uxtb r1, r1
2318-
; V7A-T-NEXT: lsr.w r1, r2, r1
2319-
; V7A-T-NEXT: ands r0, r1
2308+
; V7A-T-NEXT: lsls r0, r1
2309+
; V7A-T-NEXT: lsrs r0, r1
23202310
; V7A-T-NEXT: bx lr
23212311
;
23222312
; V6M-LABEL: bextr32_c3_load_indexzext:
23232313
; V6M: @ %bb.0:
23242314
; V6M-NEXT: uxtb r1, r1
2325-
; V6M-NEXT: ldr r3, [r0]
2326-
; V6M-NEXT: lsrs r3, r1
2327-
; V6M-NEXT: movs r0, #32
2328-
; V6M-NEXT: subs r0, r0, r2
2329-
; V6M-NEXT: uxtb r1, r0
2330-
; V6M-NEXT: movs r0, #0
2331-
; V6M-NEXT: mvns r0, r0
2315+
; V6M-NEXT: ldr r0, [r0]
2316+
; V6M-NEXT: lsrs r0, r1
2317+
; V6M-NEXT: movs r1, #32
2318+
; V6M-NEXT: subs r1, r1, r2
2319+
; V6M-NEXT: uxtb r1, r1
2320+
; V6M-NEXT: lsls r0, r1
23322321
; V6M-NEXT: lsrs r0, r1
2333-
; V6M-NEXT: ands r0, r3
23342322
; V6M-NEXT: bx lr
23352323
%val = load i32, ptr %w
23362324
%skip = zext i8 %numskipbits to i32
@@ -2355,28 +2343,25 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
23552343
; V7A: @ %bb.0:
23562344
; V7A-NEXT: lsr r0, r0, r1
23572345
; V7A-NEXT: rsb r1, r2, #32
2358-
; V7A-NEXT: mvn r2, #0
2359-
; V7A-NEXT: and r0, r0, r2, lsr r1
2346+
; V7A-NEXT: lsl r0, r0, r1
2347+
; V7A-NEXT: lsr r0, r0, r1
23602348
; V7A-NEXT: bx lr
23612349
;
23622350
; V7A-T-LABEL: bextr32_c4_commutative:
23632351
; V7A-T: @ %bb.0:
23642352
; V7A-T-NEXT: lsrs r0, r1
23652353
; V7A-T-NEXT: rsb.w r1, r2, #32
2366-
; V7A-T-NEXT: mov.w r2, #-1
2367-
; V7A-T-NEXT: lsr.w r1, r2, r1
2368-
; V7A-T-NEXT: ands r0, r1
2354+
; V7A-T-NEXT: lsls r0, r1
2355+
; V7A-T-NEXT: lsrs r0, r1
23692356
; V7A-T-NEXT: bx lr
23702357
;
23712358
; V6M-LABEL: bextr32_c4_commutative:
23722359
; V6M: @ %bb.0:
2360+
; V6M-NEXT: movs r3, #32
2361+
; V6M-NEXT: subs r2, r3, r2
23732362
; V6M-NEXT: lsrs r0, r1
2374-
; V6M-NEXT: movs r1, #32
2375-
; V6M-NEXT: subs r1, r1, r2
2376-
; V6M-NEXT: movs r2, #0
2377-
; V6M-NEXT: mvns r2, r2
2378-
; V6M-NEXT: lsrs r2, r1
2379-
; V6M-NEXT: ands r0, r2
2363+
; V6M-NEXT: lsls r0, r2
2364+
; V6M-NEXT: lsrs r0, r2
23802365
; V6M-NEXT: bx lr
23812366
%shifted = lshr i32 %val, %numskipbits
23822367
%numhighbits = sub i32 32, %numlowbits
@@ -3032,8 +3017,8 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
30323017
; V7A-NEXT: orr r0, r0, r1, lsl r3
30333018
; V7A-NEXT: lsrpl r0, r1, r2
30343019
; V7A-NEXT: rsb r1, r12, #32
3035-
; V7A-NEXT: mvn r2, #0
3036-
; V7A-NEXT: and r0, r0, r2, lsr r1
3020+
; V7A-NEXT: lsl r0, r0, r1
3021+
; V7A-NEXT: lsr r0, r0, r1
30373022
; V7A-NEXT: bx lr
30383023
;
30393024
; V7A-T-LABEL: bextr64_32_c1:
@@ -3047,9 +3032,8 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
30473032
; V7A-T-NEXT: it pl
30483033
; V7A-T-NEXT: lsrpl.w r0, r1, r2
30493034
; V7A-T-NEXT: rsb.w r1, r12, #32
3050-
; V7A-T-NEXT: mov.w r2, #-1
3051-
; V7A-T-NEXT: lsr.w r1, r2, r1
3052-
; V7A-T-NEXT: ands r0, r1
3035+
; V7A-T-NEXT: lsls r0, r1
3036+
; V7A-T-NEXT: lsrs r0, r1
30533037
; V7A-T-NEXT: bx lr
30543038
;
30553039
; V6M-LABEL: bextr64_32_c1:
@@ -3060,10 +3044,8 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
30603044
; V6M-NEXT: ldr r1, [sp, #8]
30613045
; V6M-NEXT: movs r2, #32
30623046
; V6M-NEXT: subs r1, r2, r1
3063-
; V6M-NEXT: movs r2, #0
3064-
; V6M-NEXT: mvns r2, r2
3065-
; V6M-NEXT: lsrs r2, r1
3066-
; V6M-NEXT: ands r0, r2
3047+
; V6M-NEXT: lsls r0, r1
3048+
; V6M-NEXT: lsrs r0, r1
30673049
; V6M-NEXT: pop {r7, pc}
30683050
%shifted = lshr i64 %val, %numskipbits
30693051
%truncshifted = trunc i64 %shifted to i32
@@ -3100,8 +3082,8 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
31003082
; V7A-NEXT: orr r0, r0, r1, lsl r3
31013083
; V7A-NEXT: lsrpl r0, r1, r2
31023084
; V7A-NEXT: rsb r1, r12, #32
3103-
; V7A-NEXT: mvn r2, #0
3104-
; V7A-NEXT: and r0, r0, r2, lsr r1
3085+
; V7A-NEXT: lsl r0, r0, r1
3086+
; V7A-NEXT: lsr r0, r0, r1
31053087
; V7A-NEXT: bx lr
31063088
;
31073089
; V7A-T-LABEL: bextr64_32_c2:
@@ -3115,9 +3097,8 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
31153097
; V7A-T-NEXT: it pl
31163098
; V7A-T-NEXT: lsrpl.w r0, r1, r2
31173099
; V7A-T-NEXT: rsb.w r1, r12, #32
3118-
; V7A-T-NEXT: mov.w r2, #-1
3119-
; V7A-T-NEXT: lsr.w r1, r2, r1
3120-
; V7A-T-NEXT: ands r0, r1
3100+
; V7A-T-NEXT: lsls r0, r1
3101+
; V7A-T-NEXT: lsrs r0, r1
31213102
; V7A-T-NEXT: bx lr
31223103
;
31233104
; V6M-LABEL: bextr64_32_c2:
@@ -3128,10 +3109,8 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
31283109
; V6M-NEXT: ldr r1, [sp, #8]
31293110
; V6M-NEXT: movs r2, #32
31303111
; V6M-NEXT: subs r1, r2, r1
3131-
; V6M-NEXT: movs r2, #0
3132-
; V6M-NEXT: mvns r2, r2
3133-
; V6M-NEXT: lsrs r2, r1
3134-
; V6M-NEXT: ands r0, r2
3112+
; V6M-NEXT: lsls r0, r1
3113+
; V6M-NEXT: lsrs r0, r1
31353114
; V6M-NEXT: pop {r7, pc}
31363115
%shifted = lshr i64 %val, %numskipbits
31373116
%numhighbits = sub i32 32, %numlowbits
@@ -4610,5 +4589,3 @@ define void @c7_i64(i64 %arg, ptr %ptr) nounwind {
46104589
store i64 %tmp2, ptr %ptr
46114590
ret void
46124591
}
4613-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
4614-
; V7M: {{.*}}

0 commit comments

Comments
 (0)