/
pei-scavenge-sgpr-carry-out.mir
185 lines (159 loc) · 15.7 KB
/
pei-scavenge-sgpr-carry-out.mir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
# Test what happens when an SGPR is unavailable for the unused add
# carry out when materializing the frame index.
# There are truly no free SGPRs, so the entire frame index expansion
# needs to be inverted to restore the original frame register.
---
name: scavenge_sgpr_pei_no_sgprs
tracksRegLiveness: true
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
; CHECK: liveins: $vgpr1, $vgpr2
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $vgpr3 = COPY killed $sgpr33
; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
S_ENDPGM 0, implicit $vcc
...
# One 32-bit SGPR is available for the intermediate scale computation,
# so only an extra copy to VALU is necessary.
---
name: scavenge_sgpr_pei_one_sgpr
tracksRegLiveness: true
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
; CHECK: liveins: $sgpr29, $vgpr1
; CHECK: $sgpr29 = frame-setup COPY $sgpr33
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $vgpr2 = COPY killed $sgpr33
; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr33 = frame-destroy COPY $sgpr29
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
S_ENDPGM 0, implicit $vcc
...
# When only one 64-bit SGPR is available for the unused carry out pre gfx9,
# we must reuse one of the 32-bit SGPR sub-regs to materialize the offset.
---
name: scavenge_sgpr_pei_one_sgpr_64
tracksRegLiveness: true
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
; CHECK: liveins: $sgpr28, $vgpr1
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $sgpr29 = S_ADD_U32 killed $sgpr29, 8192, implicit-def $scc
; CHECK: $vgpr2 = COPY killed $sgpr29
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
S_ENDPGM 0, implicit $vcc
...
# Prefer to use vcc as unused carry out.
---
name: scavenge_sgpr_pei_prefer_vcc
tracksRegLiveness: true
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
; CHECK: liveins: $sgpr28, $vgpr1
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $vcc_lo = S_MOV_B32 8192
; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
; CHECK: S_ENDPGM 0
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
S_ENDPGM 0
...