|
230 | 230 | and @t[7], @acc[7], @t[0] |
231 | 231 | adcs @acc[2], @acc[2], @t[6] |
232 | 232 | adcs @acc[3], @t[3], @t[7] |
233 | | - adc @t[1], @t[1], xzr // @t[1] is 1 or 0 |
| 233 | + adc @t[1], @t[1], xzr // @t[1] is 1, 0 or -1 |
234 | 234 |
|
235 | 235 | neg @t[0], @t[1] |
236 | | -
|
237 | | - and @acc[4], @acc[4], @t[0] // subtract mod<<256 conditionally |
238 | | - and @acc[5], @acc[5], @t[0] |
239 | | - subs @acc[0], @acc[0], @acc[4] |
240 | | - and @acc[6], @acc[6], @t[0] |
241 | | - sbcs @acc[1], @acc[1], @acc[5] |
242 | | - and @acc[7], @acc[7], @t[0] |
243 | | - sbcs @acc[2], @acc[2], @acc[6] |
| 236 | + orr @t[1], @t[1], @t[0] // excess bit or sign as mask |
| 237 | + asr @t[0], @t[0], #63 // excess bit as mask |
| 238 | +
|
| 239 | + and @acc[4], @acc[4], @t[1] // mask |mod| |
| 240 | + and @acc[5], @acc[5], @t[1] |
| 241 | + and @acc[6], @acc[6], @t[1] |
| 242 | + and @acc[7], @acc[7], @t[1] |
| 243 | +
|
| 244 | + eor @acc[4], @acc[4], @t[0] // conditionally negate |mod| |
| 245 | + eor @acc[5], @acc[5], @t[0] |
| 246 | + adds @acc[4], @acc[4], @t[0], lsr#63 |
| 247 | + eor @acc[6], @acc[6], @t[0] |
| 248 | + adcs @acc[5], @acc[5], xzr |
| 249 | + eor @acc[7], @acc[7], @t[0] |
| 250 | + adcs @acc[6], @acc[6], xzr |
| 251 | + adc @acc[7], @acc[7], xzr |
| 252 | +
|
| 253 | + adds @acc[0], @acc[0], @acc[4] // final adjustment for |mod|<<256 |
| 254 | + adcs @acc[1], @acc[1], @acc[5] |
| 255 | + adcs @acc[2], @acc[2], @acc[6] |
244 | 256 | stp @acc[0], @acc[1], [$out_ptr,#8*4] |
245 | | - sbcs @acc[3], @acc[3], @acc[7] |
| 257 | + adc @acc[3], @acc[3], @acc[7] |
246 | 258 | stp @acc[2], @acc[3], [$out_ptr,#8*6] |
247 | 259 |
|
248 | 260 | add sp, sp, #$frame |
|
0 commit comments