diff --git a/README.md b/README.md index 20e85bec..dd38dfed 100644 --- a/README.md +++ b/README.md @@ -72,32 +72,35 @@ pip install ecdsa[gmpy] The following table shows how long this library takes to generate keypairs (`keygen`), to sign data (`sign`), to verify those signatures (`verify`), -and to derive a shared secret (`ecdh`). +to derive a shared secret (`ecdh`), and +to verify the signatures with no key specific precomputation (`no PC verify`). All those values are in seconds. For convenience, the inverses of those values are also provided: how many keys per second can be generated (`keygen/s`), how many signatures can be made per second (`sign/s`), how many signatures can be verified -per second (`verify/s`), and how many shared secrets can be derived per second -(`ecdh/s`). The size in bytes of a raw signature (generally the smallest +per second (`verify/s`), how many shared secrets can be derived per second +(`ecdh/s`), and how many signatures with no key specific +precomputation can be verified per second (`no PC verify/s`). The size of raw +signature (generally the smallest way a signature can be encoded) is also provided in the `siglen` column. Use `tox -e speed` to generate this table on your own computer. On an Intel Core i7 4790K @ 4.0GHz I'm getting the following performance: ``` - siglen keygen keygen/s sign sign/s verify verify/s - NIST192p: 48 0.00035s 2893.02 0.00038s 2620.53 0.00069s 1458.92 - NIST224p: 56 0.00043s 2307.11 0.00048s 2092.00 0.00088s 1131.33 - NIST256p: 64 0.00056s 1793.70 0.00061s 1639.87 0.00113s 883.79 - NIST384p: 96 0.00116s 864.33 0.00124s 806.29 0.00233s 429.87 - NIST521p: 132 0.00221s 452.16 0.00234s 427.31 0.00460s 217.19 - SECP256k1: 64 0.00056s 1772.65 0.00061s 1628.73 0.00110s 912.13 - BRAINPOOLP160r1: 40 0.00026s 3801.86 0.00029s 3401.11 0.00052s 1930.47 - BRAINPOOLP192r1: 48 0.00034s 2925.73 0.00038s 2634.34 0.00070s 1438.06 - BRAINPOOLP224r1: 56 0.00044s 2287.98 0.00048s 2083.87 0.00088s 1137.52 - BRAINPOOLP256r1: 64 0.00056s 1774.11 0.00061s 1628.25 0.00112s 890.71 - BRAINPOOLP320r1: 80 0.00081s 1238.18 0.00087s 1146.71 0.00151s 661.95 - BRAINPOOLP384r1: 96 0.00117s 855.47 0.00124s 804.56 0.00241s 414.83 - BRAINPOOLP512r1: 128 0.00223s 447.99 0.00234s 427.49 0.00437s 229.09 + siglen keygen keygen/s sign sign/s verify verify/s no PC verify no PC verify/s + NIST192p: 48 0.00033s 2991.13 0.00036s 2740.86 0.00067s 1502.11 0.00136s 737.54 + NIST224p: 56 0.00042s 2360.67 0.00046s 2190.16 0.00083s 1201.83 0.00170s 587.79 + NIST256p: 64 0.00053s 1872.02 0.00057s 1743.08 0.00103s 968.53 0.00219s 457.36 + NIST384p: 96 0.00110s 907.45 0.00116s 861.63 0.00218s 459.38 0.00445s 224.92 + NIST521p: 132 0.00214s 467.72 0.00223s 448.70 0.00430s 232.76 0.00888s 112.66 + SECP256k1: 64 0.00054s 1841.11 0.00058s 1722.33 0.00111s 903.07 0.00216s 464.01 + BRAINPOOLP160r1: 40 0.00026s 3780.81 0.00029s 3422.67 0.00054s 1863.09 0.00109s 914.93 + BRAINPOOLP192r1: 48 0.00034s 2942.79 0.00037s 2710.56 0.00070s 1435.59 0.00138s 724.79 + BRAINPOOLP224r1: 56 0.00044s 2278.35 0.00047s 2145.32 0.00090s 1115.34 0.00182s 549.72 + BRAINPOOLP256r1: 64 0.00055s 1832.95 0.00059s 1704.50 0.00110s 911.02 0.00234s 427.22 + BRAINPOOLP320r1: 80 0.00077s 1305.78 0.00082s 1222.47 0.00156s 640.27 0.00321s 311.56 + BRAINPOOLP384r1: 96 0.00112s 893.07 0.00118s 849.32 0.00228s 438.75 0.00478s 209.35 + BRAINPOOLP512r1: 128 0.00213s 470.08 0.00221s 451.98 0.00419s 238.70 0.00940s 106.44 ecdh ecdh/s NIST192p: 0.00110s 910.70 @@ -118,20 +121,20 @@ On an Intel Core i7 4790K @ 4.0GHz I'm getting the following performance: To test performance with `gmpy2` loaded, use `tox -e speedgmpy2`. On the same machine I'm getting the following performance with `gmpy2`: ``` - siglen keygen keygen/s sign sign/s verify verify/s - NIST192p: 48 0.00017s 5945.50 0.00018s 5544.66 0.00033s 3002.54 - NIST224p: 56 0.00021s 4742.14 0.00022s 4463.52 0.00044s 2248.59 - NIST256p: 64 0.00024s 4155.73 0.00025s 3994.28 0.00047s 2105.34 - NIST384p: 96 0.00041s 2415.06 0.00043s 2316.41 0.00085s 1177.18 - NIST521p: 132 0.00072s 1391.14 0.00074s 1359.63 0.00140s 716.31 - SECP256k1: 64 0.00024s 4216.50 0.00025s 3994.52 0.00047s 2120.57 - BRAINPOOLP160r1: 40 0.00014s 7038.99 0.00015s 6501.55 0.00029s 3397.79 - BRAINPOOLP192r1: 48 0.00017s 5983.18 0.00018s 5626.08 0.00035s 2843.62 - BRAINPOOLP224r1: 56 0.00021s 4727.54 0.00022s 4464.86 0.00043s 2326.84 - BRAINPOOLP256r1: 64 0.00024s 4221.00 0.00025s 4010.26 0.00049s 2046.40 - BRAINPOOLP320r1: 80 0.00032s 3142.14 0.00033s 3009.15 0.00061s 1652.88 - BRAINPOOLP384r1: 96 0.00041s 2415.98 0.00043s 2340.35 0.00083s 1198.77 - BRAINPOOLP512r1: 128 0.00064s 1567.27 0.00066s 1526.33 0.00127s 788.51 + siglen keygen keygen/s sign sign/s verify verify/s no PC verify no PC verify/s + NIST192p: 48 0.00017s 5878.39 0.00018s 5670.66 0.00034s 2971.38 0.00067s 1484.97 + NIST224p: 56 0.00021s 4705.08 0.00022s 4587.19 0.00040s 2499.96 0.00088s 1140.97 + NIST256p: 64 0.00024s 4252.73 0.00024s 4108.48 0.00049s 2038.80 0.00096s 1043.03 + NIST384p: 96 0.00041s 2455.84 0.00042s 2406.31 0.00079s 1260.03 0.00172s 580.61 + NIST521p: 132 0.00070s 1419.16 0.00072s 1392.50 0.00139s 719.35 0.00307s 325.96 + SECP256k1: 64 0.00024s 4228.87 0.00024s 4086.32 0.00047s 2124.86 0.00096s 1037.53 + BRAINPOOLP160r1: 40 0.00014s 6932.12 0.00015s 6678.36 0.00030s 3387.90 0.00056s 1784.02 + BRAINPOOLP192r1: 48 0.00017s 5886.05 0.00017s 5720.63 0.00034s 2941.22 0.00067s 1490.87 + BRAINPOOLP224r1: 56 0.00021s 4748.89 0.00022s 4638.15 0.00041s 2460.86 0.00089s 1128.91 + BRAINPOOLP256r1: 64 0.00024s 4248.00 0.00024s 4135.19 0.00045s 2209.69 0.00099s 1006.45 + BRAINPOOLP320r1: 80 0.00032s 3096.85 0.00033s 3012.43 0.00065s 1547.07 0.00137s 728.60 + BRAINPOOLP384r1: 96 0.00041s 2436.12 0.00042s 2396.23 0.00083s 1211.13 0.00176s 568.39 + BRAINPOOLP512r1: 128 0.00063s 1580.09 0.00064s 1562.78 0.00129s 778.09 0.00279s 358.12 ecdh ecdh/s NIST192p: 0.00051s 1960.26 diff --git a/speed.py b/speed.py index 19132e0d..297e94f8 100644 --- a/speed.py +++ b/speed.py @@ -19,7 +19,8 @@ def do(setup_statements, statement): "{name:>16}{sep:1} {siglen:>6} {keygen:>9{form}}{unit:1} " "{keygen_inv:>9{form_inv}} {sign:>9{form}}{unit:1} " "{sign_inv:>9{form_inv}} {verify:>9{form}}{unit:1} " - "{verify_inv:>9{form_inv}}" + "{verify_inv:>9{form_inv}} {verify_single:>13{form}}{unit:1} " + "{verify_single_inv:>14{form_inv}}" ) print( @@ -31,6 +32,8 @@ def do(setup_statements, statement): sign_inv="sign/s", verify="verify", verify_inv="verify/s", + verify_single="no PC verify", + verify_single_inv="no PC verify/s", name="", sep="", unit="", @@ -54,6 +57,7 @@ def do(setup_statements, statement): keygen = do([S1], S2) sign = do([S1, S2, S3], S4) verf = do([S1, S2, S3, S4, S5, S6], S7) + verf_single = do([S1, S2, S3, S4, S5], S7) import ecdsa c = getattr(ecdsa, curve) @@ -70,6 +74,8 @@ def do(setup_statements, statement): sign_inv=1.0 / sign, verify=verf, verify_inv=1.0 / verf, + verify_single=verf_single, + verify_single_inv=1.0 / verf_single, form=".5f", form_inv=".2f", ) diff --git a/src/ecdsa/ellipticcurve.py b/src/ecdsa/ellipticcurve.py index 25565df9..0617c6ea 100644 --- a/src/ecdsa/ellipticcurve.py +++ b/src/ecdsa/ellipticcurve.py @@ -39,7 +39,7 @@ from gmpy2 import mpz GMPY = True -except ImportError: +except ImportError: # pragma: no branch try: from gmpy import mpz @@ -57,7 +57,7 @@ class CurveFp(object): """Elliptic Curve over the field of integers modulo a prime.""" - if GMPY: + if GMPY: # pragma: no branch def __init__(self, p, a, b, h=None): """ @@ -75,7 +75,7 @@ def __init__(self, p, a, b, h=None): # gmpy with it self.__h = h - else: + else: # pragma: no branch def __init__(self, p, a, b, h=None): """ @@ -164,12 +164,12 @@ def __init__(self, curve, x, y, z, order=None, generator=False): # since it's generally better (faster) to use scaled points vs unscaled # ones, use writer-biased RWLock for locking: self._update_lock = RWLock() - if GMPY: + if GMPY: # pragma: no branch self.__x = mpz(x) self.__y = mpz(y) self.__z = mpz(z) self.__order = order and mpz(order) - else: + else: # pragma: no branch self.__x = x self.__y = y self.__z = z @@ -359,7 +359,8 @@ def from_affine(point, generator=False): point.curve(), point.x(), point.y(), 1, point.order(), generator ) - # plese note that all the methods that use the equations from hyperelliptic + # please note that all the methods that use the equations from + # hyperelliptic # are formatted in a way to maximise performance. # Things that make code faster: multiplying instead of taking to the power # (`xx = x * x; xxxx = xx * xx % p` is faster than `xxxx = x**4 % p` and @@ -389,7 +390,7 @@ def _double(self, X1, Y1, Z1, p, a): """Add a point to itself, arbitrary z.""" if Z1 == 1: return self._double_with_z_1(X1, Y1, p, a) - if not Z1: + if not Y1 or not Z1: return 0, 0, 1 # after: # http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl @@ -579,11 +580,11 @@ def _naf(mult): if mult % 2: nd = mult % 4 if nd >= 2: - nd = nd - 4 - ret += [nd] + nd -= 4 + ret.append(nd) mult -= nd else: - ret += [0] + ret.append(0) mult //= 2 return ret @@ -621,15 +622,6 @@ def __mul__(self, other): return PointJacobi(self.__curve, X3, Y3, Z3, self.__order) - @staticmethod - def _leftmost_bit(x): - """Return integer with the same magnitude as x but only one bit set""" - assert x > 0 - result = 1 - while result <= x: - result = 2 * result - return result // 2 - def mul_add(self, self_mul, other, other_mul): """ Do two multiplications at the same time, add results. @@ -643,7 +635,7 @@ def mul_add(self, self_mul, other, other_mul): if not isinstance(other, PointJacobi): other = PointJacobi.from_affine(other) # when the points have precomputed answers, then multiplying them alone - # is faster (as it uses NAF) + # is faster (as it uses NAF and no point doublings) self._maybe_precompute() other._maybe_precompute() if self.__precompute and other.__precompute: @@ -653,32 +645,76 @@ def mul_add(self, self_mul, other, other_mul): self_mul = self_mul % self.__order other_mul = other_mul % self.__order - i = self._leftmost_bit(max(self_mul, other_mul)) * 2 + # (X3, Y3, Z3) is the accumulator X3, Y3, Z3 = 0, 0, 1 p, a = self.__curve.p(), self.__curve.a() - self = self.scale() - # after scaling, point is immutable, no need for locking - X1, Y1 = self.__x, self.__y - other = other.scale() - X2, Y2 = other.__x, other.__y - both = self + other - if both is INFINITY: - X4, Y4 = 0, 0 - else: - both.scale() - X4, Y4 = both.__x, both.__y + + # as we have 6 unique points to work with, we can't scale all of them, + # but do scale the ones that are used most often + # (post scale() points are immutable so no need for locking) + self.scale() + X1, Y1, Z1 = self.__x, self.__y, self.__z + other.scale() + X2, Y2, Z2 = other.__x, other.__y, other.__z + _double = self._double _add = self._add - while i > 1: + + # with NAF we have 3 options: no add, subtract, add + # so with 2 points, we have 9 combinations: + # 0, -A, +A, -B, -A-B, +A-B, +B, -A+B, +A+B + # so we need 4 combined points: + mAmB_X, mAmB_Y, mAmB_Z = _add(X1, -Y1, Z1, X2, -Y2, Z2, p) + pAmB_X, pAmB_Y, pAmB_Z = _add(X1, Y1, Z1, X2, -Y2, Z2, p) + mApB_X, mApB_Y, mApB_Z = _add(X1, -Y1, Z1, X2, Y2, Z2, p) + pApB_X, pApB_Y, pApB_Z = _add(X1, Y1, Z1, X2, Y2, Z2, p) + # when the self and other sum to infinity, we need to add them + # one by one to get correct result but as that's very unlikely to + # happen in regular operation, we don't need to optimise this case + if not pApB_Y or not pApB_Z: + return self * self_mul + other * other_mul + + # gmp object creation has cumulatively higher overhead than the + # speedup we get from calculating the NAF using gmp so ensure use + # of int() + self_naf = list(reversed(self._naf(int(self_mul)))) + other_naf = list(reversed(self._naf(int(other_mul)))) + # ensure that the lists are the same length (zip() will truncate + # longer one otherwise) + if len(self_naf) < len(other_naf): + self_naf = [0] * (len(other_naf) - len(self_naf)) + self_naf + elif len(self_naf) > len(other_naf): + other_naf = [0] * (len(self_naf) - len(other_naf)) + other_naf + + for A, B in zip(self_naf, other_naf): X3, Y3, Z3 = _double(X3, Y3, Z3, p, a) - i = i // 2 - if self_mul & i and other_mul & i: - X3, Y3, Z3 = _add(X3, Y3, Z3, X4, Y4, 1, p) - elif self_mul & i: - X3, Y3, Z3 = _add(X3, Y3, Z3, X1, Y1, 1, p) - elif other_mul & i: - X3, Y3, Z3 = _add(X3, Y3, Z3, X2, Y2, 1, p) + # conditions ordered from most to least likely + if A == 0: + if B == 0: + pass + elif B < 0: + X3, Y3, Z3 = _add(X3, Y3, Z3, X2, -Y2, Z2, p) + else: + assert B > 0 + X3, Y3, Z3 = _add(X3, Y3, Z3, X2, Y2, Z2, p) + elif A < 0: + if B == 0: + X3, Y3, Z3 = _add(X3, Y3, Z3, X1, -Y1, Z1, p) + elif B < 0: + X3, Y3, Z3 = _add(X3, Y3, Z3, mAmB_X, mAmB_Y, mAmB_Z, p) + else: + assert B > 0 + X3, Y3, Z3 = _add(X3, Y3, Z3, mApB_X, mApB_Y, mApB_Z, p) + else: + assert A > 0 + if B == 0: + X3, Y3, Z3 = _add(X3, Y3, Z3, X1, Y1, Z1, p) + elif B < 0: + X3, Y3, Z3 = _add(X3, Y3, Z3, pAmB_X, pAmB_Y, pAmB_Z, p) + else: + assert B > 0 + X3, Y3, Z3 = _add(X3, Y3, Z3, pApB_X, pApB_Y, pApB_Z, p) if not Y3 or not Z3: return INFINITY diff --git a/src/ecdsa/test_jacobi.py b/src/ecdsa/test_jacobi.py index 43ed6c12..4a938a32 100644 --- a/src/ecdsa/test_jacobi.py +++ b/src/ecdsa/test_jacobi.py @@ -367,6 +367,11 @@ def test_add_point_3_times(self): self.assertEqual(j_g * 3, j_g + j_g + j_g) + def test_mul_without_order(self): + j_g = PointJacobi(curve_256, generator_256.x(), generator_256.y(), 1) + + self.assertEqual(j_g * generator_256.order(), INFINITY) + def test_mul_add_inf(self): j_g = PointJacobi.from_affine(generator_256) @@ -405,6 +410,21 @@ def test_mul_add_to_mul(self): self.assertEqual(a, b) + def test_mul_add_differnt(self): + j_g = PointJacobi.from_affine(generator_256) + + w_a = j_g * 2 + + self.assertEqual(j_g.mul_add(1, w_a, 1), j_g * 3) + + def test_mul_add_slightly_different(self): + j_g = PointJacobi.from_affine(generator_256) + + w_a = j_g * 2 + w_b = j_g * 3 + + self.assertEqual(w_a.mul_add(1, w_b, 3), w_a * 1 + w_b * 3) + def test_mul_add(self): j_g = PointJacobi.from_affine(generator_256) @@ -428,11 +448,52 @@ def test_mul_add_large(self): j_g * (0xFF00 + 255 * 0xF0F0), j_g.mul_add(0xFF00, b, 0xF0F0) ) + def test_mul_add_with_infinity_as_result(self): + j_g = PointJacobi.from_affine(generator_256) + + order = generator_256.order() + + b = PointJacobi.from_affine(generator_256 * 256) + + self.assertEqual(j_g.mul_add(order % 256, b, order // 256), INFINITY) + + def test_mul_add_without_order(self): + j_g = PointJacobi(curve_256, generator_256.x(), generator_256.y(), 1) + + order = generator_256.order() + + w_b = generator_256 * 34 + w_b.scale() + + b = PointJacobi(curve_256, w_b.x(), w_b.y(), 1) + + self.assertEqual(j_g.mul_add(order % 34, b, order // 34), INFINITY) + + def test_mul_add_with_doubled_negation_of_itself(self): + j_g = PointJacobi.from_affine(generator_256 * 17) + + order = generator_256.order() + + dbl_neg = 2 * (-j_g) + + self.assertEqual(j_g.mul_add(4, dbl_neg, 2), INFINITY) + def test_equality(self): pj1 = PointJacobi(curve=CurveFp(23, 1, 1, 1), x=2, y=3, z=1, order=1) pj2 = PointJacobi(curve=CurveFp(23, 1, 1, 1), x=2, y=3, z=1, order=1) self.assertEqual(pj1, pj2) + def test_equality_with_invalid_object(self): + j_g = PointJacobi.from_affine(generator_256) + + self.assertNotEqual(j_g, 12) + + def test_equality_with_wrong_curves(self): + p_a = PointJacobi.from_affine(generator_256) + p_b = PointJacobi.from_affine(generator_224) + + self.assertNotEqual(p_a, p_b) + def test_pickle(self): pj = PointJacobi(curve=CurveFp(23, 1, 1, 1), x=2, y=3, z=1, order=1) self.assertEqual(pickle.loads(pickle.dumps(pj)), pj) diff --git a/tox.ini b/tox.ini index 2af988c7..4b072c3f 100644 --- a/tox.ini +++ b/tox.ini @@ -60,7 +60,7 @@ sitepackages=True whitelist_externals=coverage commands = coverage run --branch -m pytest --hypothesis-show-statistics {posargs:src/ecdsa} - coverage xml + coverage html coverage report -m [testenv:speed]