Skip to content

Commit 3b19717

Browse files
authored
[AArch64] Removed redundant FMOV instruction for truncstores of f64/f32 via bitcast to i64/i32/i8. (#149997)
Previously, storing the low bits of a double, which was bitcast to i64 and truncated to i32 or i16, would emit a redundant FMOV. This patch introduces new TableGen patterns to avoid the unnecessary FMOV. Tests added: bitcast_truncstore.ll
1 parent dd0161f commit 3b19717

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4751,6 +4751,26 @@ let Predicates = [IsLE] in {
47514751
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
47524752
}
47534753

4754+
// truncstorei32 of f64 bitcasted to i64
4755+
def : Pat<(truncstorei32 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
4756+
(STRSui (EXTRACT_SUBREG FPR64:$Rt, ssub), GPR64sp:$Rn, uimm12s4:$offset)>;
4757+
4758+
// truncstorei16 of f64 bitcasted to i64
4759+
def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
4760+
(STRHui (f16 (EXTRACT_SUBREG FPR64:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$offset)>;
4761+
4762+
// truncstorei16 of f32 bitcasted to i32
4763+
def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
4764+
(STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
4765+
4766+
// truncstorei8 of f64 bitcasted to i64
4767+
def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
4768+
(STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
4769+
4770+
// truncstorei8 of f32 bitcasted to i32
4771+
def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
4772+
(STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
4773+
47544774
// truncstore i64
47554775
def : Pat<(truncstorei32 GPR64:$Rt,
47564776
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @_Z10test_truncstore_f64toi32Pjd(ptr %n, double %x) {
5+
; CHECK-LABEL: _Z10test_truncstore_f64toi32Pjd:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: str s0, [x0]
8+
; CHECK-NEXT: ret
9+
%i64 = bitcast double %x to i64
10+
%conv = trunc i64 %i64 to i32
11+
store i32 %conv, ptr %n, align 4
12+
ret void
13+
}
14+
15+
define void @_Z9test_truncstore_f64toi16Ptd(ptr %n, double %x) {
16+
; CHECK-LABEL: _Z9test_truncstore_f64toi16Ptd:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: str h0, [x0]
19+
; CHECK-NEXT: ret
20+
%i64 = bitcast double %x to i64
21+
%conv = trunc i64 %i64 to i16
22+
store i16 %conv, ptr %n, align 2
23+
ret void
24+
}
25+
26+
define void @_Z13test_truncstore_f64toi8Phd(ptr %n, double %x) {
27+
; CHECK-LABEL: _Z13test_truncstore_f64toi8Phd:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: str b0, [x0]
30+
; CHECK-NEXT: ret
31+
%i64 = bitcast double %x to i64
32+
%conv = trunc i64 %i64 to i8
33+
store i8 %conv, ptr %n, align 1
34+
ret void
35+
}
36+
37+
define void @_Z17test_truncstore_f32toi16Ptf(ptr %n, float %x) {
38+
; CHECK-LABEL: _Z17test_truncstore_f32toi16Ptf:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: str h0, [x0]
41+
; CHECK-NEXT: ret
42+
%i32 = bitcast float %x to i32
43+
%conv = trunc i32 %i32 to i16
44+
store i16 %conv, ptr %n, align 2
45+
ret void
46+
}
47+
48+
define void @_Z16test_truncstore_f32toi8Phf(ptr %n, float %x) {
49+
; CHECK-LABEL: _Z16test_truncstore_f32toi8Phf:
50+
; CHECK: // %bb.0:
51+
; CHECK-NEXT: str b0, [x0]
52+
; CHECK-NEXT: ret
53+
%i32 = bitcast float %x to i32
54+
%conv = trunc i32 %i32 to i8
55+
store i8 %conv, ptr %n, align 1
56+
ret void
57+
}
58+
59+
define void @test_truncstore_i64tof32(ptr %n, i64 %x) {
60+
; CHECK-LABEL: test_truncstore_i64tof32:
61+
; CHECK: // %bb.0:
62+
; CHECK-NEXT: fmov d0, x1
63+
; CHECK-NEXT: fcvt s0, d0
64+
; CHECK-NEXT: str s0, [x0]
65+
; CHECK-NEXT: ret
66+
%d = bitcast i64 %x to double
67+
%f = fptrunc double %d to float
68+
store float %f, ptr %n, align 4
69+
ret void
70+
}
71+
72+
define void @test_truncstore_i32tof16(ptr %n, i32 %x) {
73+
; CHECK-LABEL: test_truncstore_i32tof16:
74+
; CHECK: // %bb.0:
75+
; CHECK-NEXT: fmov s0, w1
76+
; CHECK-NEXT: fcvt h0, s0
77+
; CHECK-NEXT: str h0, [x0]
78+
; CHECK-NEXT: ret
79+
%f = bitcast i32 %x to float
80+
%h = fptrunc float %f to half
81+
store half %h, ptr %n, align 2
82+
ret void
83+
}

0 commit comments

Comments
 (0)