Skip to content

Commit

Permalink
asm/ct_inverse_mod_256-*.pl: fix another corner case.
Browse files Browse the repository at this point in the history
Thanks to Guido Vranken for report.
  • Loading branch information
dot-asm committed Jan 25, 2022
1 parent 482ce95 commit fd45352
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 19 deletions.
32 changes: 22 additions & 10 deletions src/asm/ct_inverse_mod_256-armv8.pl
Original file line number Diff line number Diff line change
Expand Up @@ -230,19 +230,31 @@
and @t[7], @acc[7], @t[0]
adcs @acc[2], @acc[2], @t[6]
adcs @acc[3], @t[3], @t[7]
adc @t[1], @t[1], xzr // @t[1] is 1 or 0
adc @t[1], @t[1], xzr // @t[1] is 1, 0 or -1

neg @t[0], @t[1]

and @acc[4], @acc[4], @t[0] // subtract mod<<256 conditionally
and @acc[5], @acc[5], @t[0]
subs @acc[0], @acc[0], @acc[4]
and @acc[6], @acc[6], @t[0]
sbcs @acc[1], @acc[1], @acc[5]
and @acc[7], @acc[7], @t[0]
sbcs @acc[2], @acc[2], @acc[6]
orr @t[1], @t[1], @t[0] // excess bit or sign as mask
asr @t[0], @t[0], #63 // excess bit as mask

and @acc[4], @acc[4], @t[1] // mask |mod|
and @acc[5], @acc[5], @t[1]
and @acc[6], @acc[6], @t[1]
and @acc[7], @acc[7], @t[1]

eor @acc[4], @acc[4], @t[0] // conditionally negate |mod|
eor @acc[5], @acc[5], @t[0]
adds @acc[4], @acc[4], @t[0], lsr#63
eor @acc[6], @acc[6], @t[0]
adcs @acc[5], @acc[5], xzr
eor @acc[7], @acc[7], @t[0]
adcs @acc[6], @acc[6], xzr
adc @acc[7], @acc[7], xzr

adds @acc[0], @acc[0], @acc[4] // final adjustment for |mod|<<256
adcs @acc[1], @acc[1], @acc[5]
adcs @acc[2], @acc[2], @acc[6]
stp @acc[0], @acc[1], [$out_ptr,#8*4]
sbcs @acc[3], @acc[3], @acc[7]
adc @acc[3], @acc[3], @acc[7]
stp @acc[2], @acc[3], [$out_ptr,#8*6]

add sp, sp, #$frame
Expand Down
32 changes: 23 additions & 9 deletions src/asm/ct_inverse_mod_256-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -297,20 +297,34 @@
adc %rdx, @acc[7]
adc \$0, %rax

neg %rax # excess bit as mask
mov %rax, %rdx
neg %rax
or %rax, %rdx # excess bit or sign as mask
sar \$63, %rax # excess bit as mask

mov %rax, @acc[0] # mask |modulus|
mov %rax, @acc[1]
mov %rdx, @acc[0] # mask |modulus|
mov %rdx, @acc[1]
and 8*0($in_ptr), @acc[0]
mov %rax, @acc[2]
mov %rdx, @acc[2]
and 8*1($in_ptr), @acc[1]
and 8*2($in_ptr), @acc[2]
and 8*3($in_ptr), %rax
and 8*3($in_ptr), %rdx

sub @acc[0], @acc[4] # conditionally subtract |modulus|<<256
sbb @acc[1], @acc[5]
sbb @acc[2], @acc[6]
sbb %rax, @acc[7]
xor %rax, @acc[0] # conditionally negate |modulus|
xor %rcx, %rcx
xor %rax, @acc[1]
sub %rax, %rcx
xor %rax, @acc[2]
xor %rax, %rdx
add %rcx, @acc[0]
adc \$0, @acc[1]
adc \$0, @acc[2]
adc \$0, %rdx

add @acc[0], @acc[4] # final adjustment for |modulus|<<256
adc @acc[1], @acc[5]
adc @acc[2], @acc[6]
adc %rdx, @acc[7]

mov @acc[4], 8*4($out_ptr) # store absolute value
mov @acc[5], 8*5($out_ptr)
Expand Down

3 comments on commit fd45352

@Tabaie
Copy link

@Tabaie Tabaie commented on fd45352 Feb 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this only an implementation bug or did it correspond to an error in the Pornin20 paper?

@dot-asm
Copy link
Collaborator Author

@dot-asm dot-asm commented on fd45352 Feb 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's kind of a trick question. Thing is that the implementation doesn't follow the paper literally. Most notably line 22 of the algorithm 2 is different here, there is no mod m at each iteration. Instead the reduction is done once elsewhere. And the problem was on the interface to this common reduction, something that is not part of the paper. This is why the question is a "trick," either of the possible answers is formally incorrect.

@dot-asm
Copy link
Collaborator Author

@dot-asm dot-asm commented on fd45352 Feb 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though one can say following. As far as I understand the referred error was about how approximations of 'a' and 'b' were described. The problem in question is not related to the way approximations are constructed. So at least it doesn't correspond to the error.

Please sign in to comment.