diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index 86ee3825f2f28..98b0a22cc1f3f 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -2,7 +2,14 @@ ############################################################################## # # -# Copyright 2014 Intel Corporation # +# Copyright (c) 2015 Intel Corporation # +# Copyright (c) 2015 CloudFlare, Inc. # +# All rights reserved. # +# # +# This software is made available to you under your choice of the # +# Apache V.2.0 and/or BSD license below: # +# # +############################################################################## # # # Licensed under the Apache License, Version 2.0 (the "License"); # # you may not use this file except in compliance with the License. # @@ -18,10 +25,41 @@ # # ############################################################################## # # +# Redistribution and use in source and binary forms, with or without # +# modification, are permitted provided that the following conditions are # +# met: # +# # +# # Redistributions of source code must retain the above copyright # +# notice, this list of conditions and the following disclaimer. # +# # +# # Redistributions in binary form must reproduce the above copyright # +# notice, this list of conditions and the following disclaimer in the # +# documentation and/or other materials provided with the # +# distribution. # +# # +# # Neither the name of the copyright holders nor the names of its # +# contributors may be used to endorse or promote products derived from # +# this software without specific prior written permission. # +# # +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR# +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR # +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # +# # +############################################################################## +# # # Developers and authors: # -# Shay Gueron (1, 2), and Vlad Krasnov (1) # +# Shay Gueron (1, 2), and Vlad Krasnov (1, 3) # # (1) Intel Corporation, Israel Development Center # # (2) University of Haifa # +# (3) CloudFlare, Inc. # # Reference: # # S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with# # 256 Bit Primes" # @@ -108,6 +146,13 @@ .long 3,3,3,3,3,3,3,3 .LONE_mont: .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +# Constants for computations modulo ord(p256) +.align 64 +.Lord: +.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +.LordK: +.quad 0xccd1c8aaee00bc4f ___ { @@ -433,6 +478,981 @@ my ($t0,$t1,$t2,$t3,$t4)=("%rcx","%rbp","%rbx","%rdx","%rax"); my ($poly1,$poly3)=($acc6,$acc7); +$code.=<<___; +################################################################################ +# void ecp_nistz256_ord_mul_mont( +# uint64_t res[4], +# uint64_t a[4], +# uint64_t b[4]); + +.globl ecp_nistz256_ord_mul_mont +.type ecp_nistz256_ord_mul_mont,\@function,3 +.align 32 +ecp_nistz256_ord_mul_mont: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx + cmp \$0x80100, %ecx + je ecp_nistz256_ord_mul_montx +___ +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + + mov $b_org, $b_ptr + # * b[0] + mov 8*0($b_ptr), $t0 + mov 8*0($a_ptr), $t4 + mul $t0 + mov $t4, $acc0 + mov $t3, $acc1 + + mov 8*1($a_ptr), $t4 + mul $t0 + add $t4, $acc1 + adc \$0, $t3 + mov $t3, $acc2 + + mov 8*2($a_ptr), $t4 + mul $t0 + add $t4, $acc2 + adc \$0, $t3 + mov $t3, $acc3 + + mov 8*3($a_ptr), $t4 + mul $t0 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $acc4 + xor $acc5, $acc5 + + # First reduction step + mov $acc0, $t4 + mulq .LordK(%rip) + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc0 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc1 + adc \$0, $t3 + add $t4, $acc1 + + mov $t0, $t1 + adc $t3, $acc2 + adc \$0, $t1 + sub $t0, $acc2 + sbb \$0, $t1 + + mov 8*3+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc3 + adc \$0, $t3 + add $t4, $acc3 + adc $t3, $acc4 + adc \$0, $acc5 + + # * b[1] + mov 8*1($b_ptr), $t0 + + mov 8*0($a_ptr), $t4 + mul $t0 + add $t4, $acc1 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1($a_ptr), $t4 + mul $t0 + add $t1, $acc2 + adc \$0, $t3 + add $t4, $acc2 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*2($a_ptr), $t4 + mul $t0 + add $t1, $acc3 + adc \$0, $t3 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*3($a_ptr), $t4 + mul $t0 + add $t1, $acc4 + adc \$0, $t3 + add $t4, $acc4 + adc $t3, $acc5 + adc \$0, $acc0 + # Second reduction step + mov $acc1, $t4 + mulq .LordK(%rip) + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc1 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc2 + adc \$0, $t3 + add $t4, $acc2 + + mov $t0, $t1 + adc $t3, $acc3 + adc \$0, $t1 + sub $t0, $acc3 + sbb \$0, $t1 + + mov 8*3+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc4 + adc \$0, $t3 + add $t4, $acc4 + adc $t3, $acc5 + adc \$0, $acc0 + # * b[2] + mov 8*2($b_ptr), $t0 + + mov 8*0($a_ptr), $t4 + mul $t0 + add $t4, $acc2 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1($a_ptr), $t4 + mul $t0 + add $t1, $acc3 + adc \$0, $t3 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*2($a_ptr), $t4 + mul $t0 + add $t1, $acc4 + adc \$0, $t3 + add $t4, $acc4 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*3($a_ptr), $t4 + mul $t0 + add $t1, $acc5 + adc \$0, $t3 + add $t4, $acc5 + adc $t3, $acc0 + adc \$0, $acc1 + # Third reduction step + mov $acc2, $t4 + mulq .LordK(%rip) + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc2 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc3 + adc \$0, $t3 + add $t4, $acc3 + + mov $t0, $t1 + adc $t3, $acc4 + adc \$0, $t1 + sub $t0, $acc4 + sbb \$0, $t1 + + mov 8*3+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc5 + adc \$0, $t3 + add $t4, $acc5 + adc $t3, $acc0 + adc \$0, $acc1 + # * b[3] + mov 8*3($b_ptr), $t0 + + mov 8*0($a_ptr), $t4 + mul $t0 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1($a_ptr), $t4 + mul $t0 + add $t1, $acc4 + adc \$0, $t3 + add $t4, $acc4 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*2($a_ptr), $t4 + mul $t0 + add $t1, $acc5 + adc \$0, $t3 + add $t4, $acc5 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*3($a_ptr), $t4 + mul $t0 + add $t1, $acc0 + adc \$0, $t3 + add $t4, $acc0 + adc $t3, $acc1 + adc \$0, $acc2 + # Last reduction step + mov $acc3, $t4 + mulq .LordK(%rip) + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc4 + adc \$0, $t3 + add $t4, $acc4 + + mov $t0, $t1 + adc $t3, $acc5 + adc \$0, $t1 + sub $t0, $acc5 + sbb \$0, $t1 + + mov 8*3+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc0 + adc \$0, $t3 + add $t4, $acc0 + adc $t3, $acc1 + adc \$0, $acc2 + + # Copy result [255:0] + mov $acc4, $a_ptr + mov $acc5, $acc3 + mov $acc0, $t0 + mov $acc1, $t1 + # Subtract ord + sub 8*0+.Lord(%rip), $acc4 + sbb 8*1+.Lord(%rip), $acc5 + sbb 8*2+.Lord(%rip), $acc0 + sbb 8*3+.Lord(%rip), $acc1 + sbb \$0, $acc2 + + cmovc $a_ptr, $acc4 + cmovc $acc3, $acc5 + cmovc $t0, $acc0 + cmovc $t1, $acc1 + + mov $acc4, 8*0($r_ptr) + mov $acc5, 8*1($r_ptr) + mov $acc0, 8*2($r_ptr) + mov $acc1, 8*3($r_ptr) + + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont +___ +$code.=<<___ if ($addx); +################################################################################ +.align 32 +ecp_nistz256_ord_mul_montx: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + mov $b_org, $b_ptr + mov 8*0($b_org), %rdx + mov 8*0($a_ptr), $acc1 + mov 8*1($a_ptr), $acc2 + mov 8*2($a_ptr), $acc3 + mov 8*3($a_ptr), $acc4 + lea -128($a_ptr), $a_ptr # control u-op density + + # Multiply by b[0] + mulx $acc1, $acc0, $acc1 + mulx $acc2, $t0, $acc2 + xor $acc5, $acc5 # cf=0 + mulx $acc3, $t1, $acc3 + adc $t0, $acc1 + mulx $acc4, $t0, $acc4 + mov $acc0, %rdx + mulx .LordK(%rip), %rdx, $t4 + adc $t1, $acc2 + adc $t0, $acc3 + adc \$0, $acc4 + + ######################################################################## + xor %eax, %eax + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc0 + adox $t1, $acc1 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + mov 8*1($b_ptr), %rdx + adcx %rax, $acc4 + adox %rax, $acc5 + adc \$0, $acc5 + xor $acc0 ,$acc0 + ######################################################################## + # Multiply by b[1] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc1, %rdx + mulx .LordK(%rip), %rdx, $t4 + adcx $t0, $acc4 + adox $t1, $acc5 + + adcx $acc0, $acc5 + adox $acc0, $acc0 + adc \$0, $acc0 + ######################################################################## + xor %eax, %eax + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + mov 8*2($b_ptr), %rdx + adcx %rax, $acc5 + adox %rax, $acc0 + adc \$0, $acc0 + xor $acc1 ,$acc1 # $acc1=0,cf=0,of=0 + ######################################################################## + # Multiply by b[2] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc2, %rdx + mulx .LordK(%rip), %rdx, $t4 + adcx $t0, $acc5 + adox $t1, $acc0 + + adcx $acc1, $acc0 + adox $acc1, $acc1 + adc \$0, $acc1 + + ######################################################################## + xor %eax, %eax + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc5 + adox $t1, $acc0 + mov 8*3($b_ptr), %rdx + adcx %rax, $acc0 + adox %rax, $acc1 + adc \$0, $acc1 + xor $acc2 ,$acc2 # $acc2=0,cf=0,of=0 + ######################################################################## + # Multiply by b[3] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc5 + adox $t1, $acc0 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc3, %rdx + mulx .LordK(%rip), %rdx, $t4 + adcx $t0, $acc0 + adox $t1, $acc1 + + adcx $acc2, $acc1 + adox $acc2, $acc2 + adc \$0, $acc2 + + ######################################################################## + xor %eax, %eax + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc5 + adox $t1, $acc0 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc0 + adox $t1, $acc1 + adcx %rax, $acc1 + adox %rax, $acc2 + adc \$0, $acc2 + + ######################################################################## + # Branch-less conditional subtraction of P + xor %eax, %eax + mov $acc4, $t2 + mov $acc5, $t3 + mov $acc0, $t0 + mov $acc1, $t1 + sbb 8*0+.Lord(%rip), $acc4 # .Lpoly[0] + sbb 8*1+.Lord(%rip), $acc5 # .Lpoly[1] + sbb 8*2+.Lord(%rip), $acc0 # .Lpoly[1] + sbb 8*3+.Lord(%rip), $acc1 # .Lpoly[1] + sbb \$0, $acc2 + + cmovc $t2, $acc4 + cmovc $t3, $acc5 + mov $acc4, 8*0($r_ptr) + cmovc $t0, $acc0 + mov $acc5, 8*1($r_ptr) + cmovc $t1, $acc1 + mov $acc0, 8*2($r_ptr) + mov $acc1, 8*3($r_ptr) + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx +################################################################################ +___ +$code.=<<___; +# void ecp_nistz256_ord_sqr_mont( +# uint64_t res[4], +# uint64_t a[4], +# int rep); + +.globl ecp_nistz256_ord_sqr_mont +.type ecp_nistz256_ord_sqr_mont,\@function,3 +.align 32 +ecp_nistz256_ord_sqr_mont: + +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx + cmp \$0x80100, %ecx + je ecp_nistz256_ord_sqr_montx +___ +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + mov .LordK(%rip), %r15 + + mov $b_org, %r14 + +.Lord_sqr_loop: + # y[1:] * y[0] + mov 8*0($a_ptr), $t0 + + mov 8*1($a_ptr), $t4 + mul $t0 + mov $t4, $acc1 + mov $t3, $acc2 + + mov 8*2($a_ptr), $t4 + mul $t0 + add $t4, $acc2 + adc \$0, $t3 + mov $t3, $acc3 + + mov 8*3($a_ptr), $t4 + mul $t0 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $acc4 + # y[2:] * y[1] + mov 8*1($a_ptr), $t0 + + mov 8*2($a_ptr), $t4 + mul $t0 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*3($a_ptr), $t4 + mul $t0 + add $t1, $acc4 + adc \$0, $t3 + add $t4, $acc4 + adc \$0, $t3 + mov $t3, $acc5 + # y[3] * y[2] + mov 8*2($a_ptr), $t0 + + mov 8*3($a_ptr), $t4 + mul $t0 + add $t4, $acc5 + adc \$0, $t3 + mov $t3, $b_ptr + xor $t1, $t1 + # *2 + add $acc1, $acc1 + adc $acc2, $acc2 + adc $acc3, $acc3 + adc $acc4, $acc4 + adc $acc5, $acc5 + adc $b_ptr, $b_ptr + adc \$0, $t1 + # Missing products + mov 8*0($a_ptr), $t4 + mul $t4 + mov $t4, $acc0 + mov $t3, $t0 + + mov 8*1($a_ptr), $t4 + mul $t4 + add $t0, $acc1 + adc $t4, $acc2 + adc \$0, $t3 + mov $t3, $t0 + + mov 8*2($a_ptr), $t4 + mul $t4 + add $t0, $acc3 + adc $t4, $acc4 + adc \$0, $t3 + mov $t3, $t0 + + mov 8*3($a_ptr), $t4 + mul $t4 + add $t0, $acc5 + adc $t4, $b_ptr + adc $t3, $t1 + mov $t1, $a_ptr + + # First reduction step + mov $acc0, $t4 + mulq %r15 + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc0 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc1 + adc \$0, $t3 + add $t4, $acc1 + + mov $t0, $t1 + adc $t3, $acc2 + adc \$0, $t1 + sub $t0, $acc2 + sbb \$0, $t1 + + mov $t0, $t4 + mov $t0, $t3 + mov $t0, $acc0 + shl \$32, $t4 + shr \$32, $t3 + + add $t1, $acc3 + adc \$0, $acc0 + sub $t4, $acc3 + sbb $t3, $acc0 + + # Second reduction step + mov $acc1, $t4 + mulq %r15 + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc1 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc2 + adc \$0, $t3 + add $t4, $acc2 + + mov $t0, $t1 + adc $t3, $acc3 + adc \$0, $t1 + sub $t0, $acc3 + sbb \$0, $t1 + + mov $t0, $t4 + mov $t0, $t3 + mov $t0, $acc1 + shl \$32, $t4 + shr \$32, $t3 + + add $t1, $acc0 + adc \$0, $acc1 + sub $t4, $acc0 + sbb $t3, $acc1 + + # Third reduction step + mov $acc2, $t4 + mulq %r15 + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc2 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc3 + adc \$0, $t3 + add $t4, $acc3 + + mov $t0, $t1 + adc $t3, $acc0 + adc \$0, $t1 + sub $t0, $acc0 + sbb \$0, $t1 + + mov $t0, $t4 + mov $t0, $t3 + mov $t0, $acc2 + shl \$32, $t4 + shr \$32, $t3 + + add $t1, $acc1 + adc \$0, $acc2 + sub $t4, $acc1 + sbb $t3, $acc2 + + # Last reduction step + mov $acc3, $t4 + mulq %r15 + mov $t4, $t0 + + mov 8*0+.Lord(%rip), $t4 + mul $t0 + add $t4, $acc3 + adc \$0, $t3 + mov $t3, $t1 + + mov 8*1+.Lord(%rip), $t4 + mul $t0 + add $t1, $acc0 + adc \$0, $t3 + add $t4, $acc0 + + mov $t0, $t1 + adc $t3, $acc1 + adc \$0, $t1 + sub $t0, $acc1 + sbb \$0, $t1 + + mov $t0, $t4 + mov $t0, $acc3 + shl \$32, $t4 + shr \$32, $t0 + + add $t1, $acc2 + adc \$0, $acc3 + sub $t4, $acc2 + sbb $t0, $acc3 + xor $t0, $t0 + # Add bits [511:256] of the sqr result + add $acc4, $acc0 + adc $acc5, $acc1 + adc $b_ptr, $acc2 + adc $a_ptr, $acc3 + adc \$0, $t0 + + mov $acc0, $acc4 + mov $acc1, $acc5 + mov $acc2, $b_ptr + mov $acc3, $t1 + # Subtract p256 + sub 8*0+.Lord(%rip), $acc0 + sbb 8*1+.Lord(%rip), $acc1 + sbb 8*2+.Lord(%rip), $acc2 + sbb 8*3+.Lord(%rip), $acc3 + sbb \$0, $t0 + + cmovc $acc4, $acc0 + cmovc $acc5, $acc1 + cmovc $b_ptr, $acc2 + cmovc $t1, $acc3 + + mov $acc0, 8*0($r_ptr) + mov $acc1, 8*1($r_ptr) + mov $acc2, 8*2($r_ptr) + mov $acc3, 8*3($r_ptr) + mov $r_ptr, $a_ptr + dec %r14 + jne .Lord_sqr_loop + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont +___ +$code.=<<___ if ($addx); +.align 32 +ecp_nistz256_ord_sqr_montx: + + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + mov $b_org, $t2 + lea -128($a_ptr), $a_ptr # control u-op density +.Lord_sqrx_loop: + mov 8*0+128($a_ptr), %rdx + mov 8*1+128($a_ptr), $acc6 + mov 8*2+128($a_ptr), $acc7 + mov 8*3+128($a_ptr), $acc0 + + mulx $acc6, $acc1, $acc2 # a[0]*a[1] + mulx $acc7, $t0, $acc3 # a[0]*a[2] + xor %eax, %eax + adc $t0, $acc2 + mulx $acc0, $t1, $acc4 # a[0]*a[3] + mov $acc6, %rdx + adc $t1, $acc3 + adc \$0, $acc4 + xor $acc5, $acc5 # $acc5=0,cf=0,of=0 + ################################# + mulx $acc7, $t0, $t1 # a[1]*a[2] + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx $acc0, $t0, $t1 # a[1]*a[3] + mov $acc7, %rdx + adcx $t0, $acc4 + adox $t1, $acc5 + adc \$0, $acc5 + ################################# + mulx $acc0, $t0, $acc6 # a[2]*a[3] + mov 8*0+128($a_ptr), %rdx + xor $acc7, $acc7 # $acc7=0,cf=0,of=0 + adcx $acc1, $acc1 # acc1:6<<1 + adox $t0, $acc5 + adcx $acc2, $acc2 + adox $acc7, $acc6 # of=0 + + mulx %rdx, $acc0, $t1 + mov 8*1+128($a_ptr), %rdx + adcx $acc3, $acc3 + adox $t1, $acc1 + adcx $acc4, $acc4 + mulx %rdx, $t0, $t4 + mov 8*2+128($a_ptr), %rdx + adcx $acc5, $acc5 + adox $t0, $acc2 + adcx $acc6, $acc6 + .byte 0x67 + mulx %rdx, $t0, $t1 + mov 8*3+128($a_ptr), %rdx + adox $t4, $acc3 + adcx $acc7, $acc7 + adox $t0, $acc4 + adox $t1, $acc5 + .byte 0x67,0x67 + mulx %rdx, $t0, $t4 + adox $t0, $acc6 + adox $t4, $acc7 + + #reduce + mov $acc0, %rdx + mulx .LordK(%rip), %rdx, $t0 + + xor %eax, %eax + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc0 + adox $t1, $acc1 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc0 + adcx %rax, $acc0 + ################################# + mov $acc1, %rdx + mulx .LordK(%rip), %rdx, $t0 + + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc0 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc0 + adox $t1, $acc1 + adcx %rax, $acc1 + ################################# + mov $acc2, %rdx + mulx .LordK(%rip), %rdx, $t0 + + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc0 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc0 + adox $t1, $acc1 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + adcx %rax, $acc2 + ################################# + mov $acc3, %rdx + mulx .LordK(%rip), %rdx, $t0 + + mulx 8*0+.Lord(%rip), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc0 + mulx 8*1+.Lord(%rip), $t0, $t1 + adcx $t0, $acc0 + adox $t1, $acc1 + mulx 8*2+.Lord(%rip), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + mulx 8*3+.Lord(%rip), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + adcx %rax, $acc3 + + xor $t0, $t0 + add $acc4, $acc0 + adc $acc5, $acc1 + adc $acc6, $acc2 + adc $acc7, $acc3 + adc \$0, $t0 + + mov $acc0, $acc4 + mov $acc1, $acc5 + mov $acc2, $acc6 + mov $acc3, $acc7 + # Subtract p256 + sub 8*0+.Lord(%rip), $acc0 + sbb 8*1+.Lord(%rip), $acc1 + sbb 8*2+.Lord(%rip), $acc2 + sbb 8*3+.Lord(%rip), $acc3 + sbb \$0, $t0 + + cmovc $acc4, $acc0 + cmovc $acc5, $acc1 + cmovc $acc6, $acc2 + cmovc $acc7, $acc3 + + mov $acc0, 8*0($r_ptr) + mov $acc1, 8*1($r_ptr) + mov $acc2, 8*2($r_ptr) + mov $acc3, 8*3($r_ptr) + + lea -128($r_ptr), $a_ptr + + dec $t2 + jne .Lord_sqrx_loop + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret + +.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx +___ $code.=<<___; ################################################################################ # void ecp_nistz256_to_mont( diff --git a/crypto/ec/ec_err.c b/crypto/ec/ec_err.c index 13b32c78ac744..e8286899179d9 100644 --- a/crypto/ec/ec_err.c +++ b/crypto/ec/ec_err.c @@ -190,6 +190,7 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_EC_GROUP_SET_CURVE_GFP), "EC_GROUP_set_curve_GFp"}, {ERR_FUNC(EC_F_EC_GROUP_SET_EXTRA_DATA), "EC_GROUP_SET_EXTRA_DATA"}, {ERR_FUNC(EC_F_EC_GROUP_SET_GENERATOR), "EC_GROUP_set_generator"}, + {ERR_FUNC(EC_F_EC_GROUP_ORD_INVERSE), "EC_GROUP_do_inverse_ord"}, {ERR_FUNC(EC_F_EC_KEY_CHECK_KEY), "EC_KEY_check_key"}, {ERR_FUNC(EC_F_EC_KEY_COPY), "EC_KEY_copy"}, {ERR_FUNC(EC_F_EC_KEY_GENERATE_KEY), "EC_KEY_generate_key"}, @@ -245,6 +246,7 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE), "ecp_nistz256_mult_precompute"}, {ERR_FUNC(EC_F_ECP_NISTZ256_PRE_COMP_NEW), "ecp_nistz256_pre_comp_new"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_INV_ORD), "ecp_nistz256_inv_mod_ord"}, {ERR_FUNC(EC_F_O2I_ECPUBLICKEY), "o2i_ECPublicKey"}, {ERR_FUNC(EC_F_OLD_EC_PRIV_DECODE), "OLD_EC_PRIV_DECODE"}, {ERR_FUNC(EC_F_PKEY_EC_CTRL), "PKEY_EC_CTRL"}, diff --git a/crypto/ec/ec_lcl.h b/crypto/ec/ec_lcl.h index 9db7106c5aaa7..b79ee35992460 100644 --- a/crypto/ec/ec_lcl.h +++ b/crypto/ec/ec_lcl.h @@ -196,6 +196,9 @@ struct ec_method_st { int (*field_decode) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); int (*field_set_to_one) (const EC_GROUP *, BIGNUM *r, BN_CTX *); + + /* Inverse modulo order */ + int (*field_inverse_mod_ord) (const EC_GROUP *, BIGNUM *r, const BIGNUM *x, BN_CTX *ctx); } /* EC_METHOD */ ; typedef struct ec_extra_data_st { diff --git a/crypto/ec/ec_lib.c b/crypto/ec/ec_lib.c index 9156943e200a0..80d29fdcf9953 100644 --- a/crypto/ec/ec_lib.c +++ b/crypto/ec/ec_lib.c @@ -332,6 +332,47 @@ int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx) return !BN_is_zero(order); } +int EC_GROUP_do_inverse_ord(const EC_GROUP *group, + BIGNUM *res, + const BIGNUM *x, + BN_CTX *ctx, + int constantTime) +{ + int ret = 0; + BN_CTX_start(ctx); + if (group->meth->field_inverse_mod_ord != NULL) { + ret = group->meth->field_inverse_mod_ord(group, res, x, ctx); + } else if (constantTime) { + BIGNUM *tmp; + if ((tmp = BN_CTX_get(ctx)) == NULL) { + ECDSAerr(EC_F_EC_GROUP_ORD_INVERSE, ERR_R_BN_LIB); + goto err; + } + if (!BN_set_word(tmp, 2)) { + ECDSAerr(EC_F_EC_GROUP_ORD_INVERSE, ERR_R_BN_LIB); + goto err; + } + if (!BN_mod_sub(tmp, group->order, tmp, group->order, ctx)) { + ECDSAerr(EC_F_EC_GROUP_ORD_INVERSE, ERR_R_BN_LIB); + goto err; + } + BN_set_flags(tmp, BN_FLG_CONSTTIME); + if (!BN_mod_exp_mont_consttime + (res, x, tmp, group->order, ctx, EC_GROUP_get_mont_data(group))) { + ECDSAerr(EC_F_EC_GROUP_ORD_INVERSE, ERR_R_BN_LIB); + goto err; + } + ret = 1; + } else { + if (BN_mod_inverse(res, x, group->order, ctx)) { + ret = 1; + } + } +err: + BN_CTX_end(ctx); + return ret; +} + int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor, BN_CTX *ctx) { diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c index 83f0c6fdfdce6..38001e05d0ad0 100644 --- a/crypto/ec/ecp_nistz256.c +++ b/crypto/ec/ecp_nistz256.c @@ -1,6 +1,13 @@ /****************************************************************************** * * - * Copyright 2014 Intel Corporation * + * Copyright (c) 2015 Intel Corporation * + * Copyright (c) 2015 CloudFlare, Inc. * + * All rights reserved. * + * * + * This software is made available to you under your choice of the * + * Apache V.2.0 and/or BSD license below: * + * * + ****************************************************************************** * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * @@ -16,10 +23,41 @@ * * ****************************************************************************** * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted provided that the following conditions are * + * met: * + * * + * 1. Redistributions of source code must retain the above copyright * + * notice, this list of conditions and the following disclaimer. * + * * + * 2. Redistributions in binary form must reproduce the above copyright * + * notice, this list of conditions and the following disclaimer in the * + * documentation and/or other materials provided with the * + * distribution. * + * * + * 3. Neither the name of the copyright holders nor the names of its * + * contributors may be used to endorse or promote products derived from * + * this software without specific prior written permission. * + * * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR* + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * * + ****************************************************************************** + * * * Developers and authors: * - * Shay Gueron (1, 2), and Vlad Krasnov (1) * + * Shay Gueron (1, 2), and Vlad Krasnov (1, 3) * * (1) Intel Corporation, Israel Development Center * * (2) University of Haifa * + * (3) CloudFlare, Inc. * * Reference: * * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with * * 256 Bit Primes" * @@ -102,6 +140,13 @@ void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS], const BN_ULONG b[P256_LIMBS]); +/* Montgomery mul modulo Order(P): res = a*b*2^-256 mod Order(P) */ +void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + int rep); /* Montgomery sqr: res = a*a*2^-256 mod P */ void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); @@ -1357,9 +1402,109 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group, return ret; } +#if defined(__x86_64) || defined(__x86_64__) +__owur int ecp_nistz256_inv_mod_ord(const EC_GROUP *group, + BIGNUM *r, + const BIGNUM *x, + BN_CTX *ctx) +{ + /* RR = 2^512 mod ord(p256) */ + static const BN_ULONG RR[P256_LIMBS] = {TOBN(0x83244c95,0xbe79eea2), + TOBN(0x4699799c,0x49bd6fa6), + TOBN(0x2845b239,0x2b6bec59), + TOBN(0x66e12d94,0xf3d95620)}; + /* The constant 1 (unlike ONE that is one in Montgomery representation) */ + static const BN_ULONG CONST_ONE[P256_LIMBS] = {TOBN(0,1), + TOBN(0,0), + TOBN(0,0), + TOBN(0,0)}; + /* expLo - the low 128bit of the exponent we use (ord(p256) - 2), + split into 4bit windows */ + static const unsigned char expLo[32] = {0xb,0xc,0xe,0x6, + 0xf,0xa,0xa,0xd, + 0xa,0x7,0x1,0x7, + 0x9,0xe,0x8,0x4, + 0xf,0x3,0xb,0x9, + 0xc,0xa,0xc,0x2, + 0xf,0xc,0x6,0x3, + 0x2,0x5,0x4,0xf}; + + BN_ULONG table[P256_LIMBS*15]; + BN_ULONG out[P256_LIMBS], t[P256_LIMBS]; + int i, ret = 0; + BIGNUM *tmp; + + if ((BN_num_bits(x) > 256) + || BN_is_negative(x)) { + if ((tmp = BN_CTX_get(ctx)) == NULL) { + ECerr(EC_F_ECP_NISTZ256_INV_ORD, ERR_R_BN_LIB); + goto err; + } + if (!BN_nnmod(tmp, x, group->order, ctx)) { + ECerr(EC_F_ECP_NISTZ256_INV_ORD, ERR_R_BN_LIB); + goto err; + } + x = tmp; + } + /* We don't use entry 0 in the table, so we address with -1 offset */ + ecp_nistz256_bignum_to_field_elem(out, x); + ecp_nistz256_ord_mul_mont(&table[0*P256_LIMBS], out, RR); + for ( i = 2; i < 16; i+=2 ) { + ecp_nistz256_ord_sqr_mont(&table[(i-1)*P256_LIMBS], + &table[(i/2-1)*P256_LIMBS], 1); + ecp_nistz256_ord_mul_mont(&table[i*P256_LIMBS], + &table[(i-1)*P256_LIMBS], + &table[0*P256_LIMBS]); + } + /* The top 128bit of the exponent are highly redundant, + so we perform an optimized flow */ + /* f */ + memcpy(out, &table[(15-1)*P256_LIMBS], sizeof(out)); + /* f0 */ + ecp_nistz256_ord_sqr_mont(out, out, 4); + /* ff */ + ecp_nistz256_ord_mul_mont(out, out, &table[(15-1)*P256_LIMBS]); + memcpy(t, out, sizeof(t)); + /* ff00 */ + ecp_nistz256_ord_sqr_mont(out, out, 8); + /* ffff */ + ecp_nistz256_ord_mul_mont(out, out, t); + memcpy(t, out, sizeof(t)); + /* ffff0000 */ + ecp_nistz256_ord_sqr_mont(out, out, 16); + /* ffffffff */ + ecp_nistz256_ord_mul_mont(out, out, t); + memcpy(t, out, sizeof(t)); + /* ffffffff0000000000000000 */ + ecp_nistz256_ord_sqr_mont(out, out, 64); + /* ffffffff00000000ffffffff */ + ecp_nistz256_ord_mul_mont(out, out, t); + /* ffffffff00000000ffffffff00000000 */ + ecp_nistz256_ord_sqr_mont(out, out, 32); + /* ffffffff00000000ffffffffffffffff */ + ecp_nistz256_ord_mul_mont(out, out, t); + + /* The bottom 128 bit of the exponent are easier done with a table */ + for( i = 0; i < 32; i++ ) { + ecp_nistz256_ord_sqr_mont(out, out, 4); + /* The exponent is public, no need in constant time access */ + ecp_nistz256_ord_mul_mont(out, out, &table[(expLo[i]-1)*P256_LIMBS]); + } + ecp_nistz256_ord_mul_mont(out, out, CONST_ONE); + + if (!bn_set_words(r, out, P256_LIMBS)) { + ECerr(EC_F_ECP_NISTZ256_INV_ORD, ERR_R_BN_LIB); + goto err; + } + ret = 1; +err: + return ret; +} +#endif + __owur static int ecp_nistz256_get_affine(const EC_GROUP *group, - const EC_POINT *point, - BIGNUM *x, BIGNUM *y, BN_CTX *ctx) + const EC_POINT *point, + BIGNUM *x, BIGNUM *y, BN_CTX *ctx) { BN_ULONG z_inv2[P256_LIMBS]; BN_ULONG z_inv3[P256_LIMBS]; @@ -1519,7 +1664,12 @@ const EC_METHOD *EC_GFp_nistz256_method(void) 0, /* field_div */ ec_GFp_mont_field_encode, ec_GFp_mont_field_decode, - ec_GFp_mont_field_set_to_one + ec_GFp_mont_field_set_to_one, +#if defined(__x86_64) || defined(__x86_64__) + ecp_nistz256_inv_mod_ord +#else + 0 +#endif }; return &ret; diff --git a/crypto/ecdsa/ecs_ossl.c b/crypto/ecdsa/ecs_ossl.c index 27266e9173316..4200017fa0e51 100644 --- a/crypto/ecdsa/ecs_ossl.c +++ b/crypto/ecdsa/ecs_ossl.c @@ -158,9 +158,10 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, * compute G*k using an equivalent scalar of fixed bit-length. */ - if (!BN_add(k, k, order)) - goto err; - if (BN_num_bits(k) <= BN_num_bits(order)) + if (BN_num_bits(k) < BN_num_bits(order)) + if (!BN_add(k, k, order)) + goto err; + if (BN_num_bits(k) < BN_num_bits(order)) if (!BN_add(k, k, order)) goto err; @@ -195,31 +196,8 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, } while (BN_is_zero(r)); - /* compute the inverse of k */ - if (EC_GROUP_get_mont_data(group) != NULL) { - /* - * We want inverse in constant time, therefore we utilize the fact - * order must be prime and use Fermats Little Theorem instead. - */ - if (!BN_set_word(X, 2)) { - ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; - } - if (!BN_mod_sub(X, order, X, order, ctx)) { - ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; - } - BN_set_flags(X, BN_FLG_CONSTTIME); - if (!BN_mod_exp_mont_consttime - (k, k, X, order, ctx, EC_GROUP_get_mont_data(group))) { - ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; - } - } else { - if (!BN_mod_inverse(k, k, order, ctx)) { - ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; - } + if (!EC_GROUP_do_inverse_ord(group, k, k, ctx, 1)) { + goto err; } /* clear old values if necessary */ @@ -399,10 +377,10 @@ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len, goto err; } /* calculate tmp1 = inv(S) mod order */ - if (!BN_mod_inverse(u2, sig->s, order, ctx)) { - ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, ERR_R_BN_LIB); + if (!EC_GROUP_do_inverse_ord(group, u2, sig->s, ctx, 0)) { goto err; } + /* digest -> m */ i = BN_num_bits(order); /* diff --git a/include/openssl/ec.h b/include/openssl/ec.h index 2d36dd5ea74e8..a786393671493 100644 --- a/include/openssl/ec.h +++ b/include/openssl/ec.h @@ -248,6 +248,17 @@ const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group); */ BN_MONT_CTX *EC_GROUP_get_mont_data(const EC_GROUP *group); +/** Perfroms inversion modulo order of a EC_GROUP + * \param group EC_GROUP object + * \param res BIGNUM to which the result is copied + * \param x BIGNUM of which inverse to compute + * \param ctx BN_CTX object + * \param constantTime int if 0 - not constant time + * \return 1 on success and 0 if an error occurred +*/ +int EC_GROUP_do_inverse_ord(const EC_GROUP *group, BIGNUM *res, + const BIGNUM *x, BN_CTX *ctx, int constantTime); + /** Gets the order of a EC_GROUP * \param group EC_GROUP object * \param order BIGNUM to which the order is copied @@ -1167,6 +1178,7 @@ void ERR_load_EC_strings(void); # define EC_F_EC_GROUP_SET_CURVE_GFP 109 # define EC_F_EC_GROUP_SET_EXTRA_DATA 110 # define EC_F_EC_GROUP_SET_GENERATOR 111 +# define EC_F_EC_GROUP_ORD_INVERSE 245 # define EC_F_EC_KEY_CHECK_KEY 177 # define EC_F_EC_KEY_COPY 178 # define EC_F_EC_KEY_GENERATE_KEY 179 @@ -1212,6 +1224,7 @@ void ERR_load_EC_strings(void); # define EC_F_ECP_NISTZ256_WINDOWED_MUL 242 # define EC_F_ECP_NISTZ256_MULT_PRECOMPUTE 243 # define EC_F_ECP_NISTZ256_PRE_COMP_NEW 244 +# define EC_F_ECP_NISTZ256_INV_ORD 246 # define EC_F_O2I_ECPUBLICKEY 152 # define EC_F_OLD_EC_PRIV_DECODE 222 # define EC_F_PKEY_EC_CTRL 197