Permalink
Browse files

added libtommath-0.13

  • Loading branch information...
1 parent 57354e1 commit b66471f74f1f16ebd823f71c5602cc6b68e69574 Tom St Denis committed with sjaeckel Feb 28, 2003
Showing with 750 additions and 517 deletions.
  1. BIN bn.pdf
  2. +16 −17 bn.tex
  3. +14 −6 bn_fast_mp_invmod.c
  4. +51 −26 bn_fast_mp_montgomery_reduce.c
  5. +35 −27 bn_fast_s_mp_mul_digs.c
  6. +7 −6 bn_fast_s_mp_mul_high_digs.c
  7. +39 −18 bn_fast_s_mp_sqr.c
  8. +6 −2 bn_mp_2expt.c
  9. +5 −2 bn_mp_abs.c
  10. +2 −2 bn_mp_add.c
  11. +2 −3 bn_mp_add_d.c
  12. +2 −3 bn_mp_addmod.c
  13. +2 −2 bn_mp_and.c
  14. +7 −1 bn_mp_clamp.c
  15. +1 −3 bn_mp_cmp.c
  16. +1 −2 bn_mp_cmp_mag.c
  17. +15 −9 bn_mp_copy.c
  18. +2 −2 bn_mp_count_bits.c
  19. +7 −9 bn_mp_div.c
  20. +23 −10 bn_mp_div_2.c
  21. +3 −3 bn_mp_div_2d.c
  22. +2 −2 bn_mp_div_d.c
  23. +1 −1 bn_mp_exch.c
  24. +2 −2 bn_mp_expt_d.c
  25. +19 −10 bn_mp_exptmod.c
  26. +10 −22 bn_mp_exptmod_fast.c
  27. +5 −5 bn_mp_gcd.c
  28. +1 −2 bn_mp_grow.c
  29. +2 −3 bn_mp_init_copy.c
  30. +3 −5 bn_mp_invmod.c
  31. +3 −3 bn_mp_jacobi.c
  32. +2 −2 bn_mp_karatsuba_mul.c
  33. +2 −2 bn_mp_karatsuba_sqr.c
  34. +2 −2 bn_mp_lcm.c
  35. +1 −1 bn_mp_lshd.c
  36. +2 −2 bn_mp_mod.c
  37. +2 −3 bn_mp_mod_2d.c
  38. +2 −2 bn_mp_mod_d.c
  39. +53 −0 bn_mp_montgomery_calc_normalization.c
  40. +5 −10 bn_mp_montgomery_reduce.c
  41. +2 −2 bn_mp_montgomery_setup.c
  42. +16 −2 bn_mp_mul.c
  43. +36 −15 bn_mp_mul_2.c
  44. +2 −2 bn_mp_mul_2d.c
  45. +29 −18 bn_mp_mul_d.c
  46. +2 −2 bn_mp_mulmod.c
  47. +6 −2 bn_mp_n_root.c
  48. +1 −1 bn_mp_neg.c
  49. +2 −2 bn_mp_or.c
  50. +8 −7 bn_mp_rand.c
  51. +1 −1 bn_mp_read_signed_bin.c
  52. +1 −1 bn_mp_read_unsigned_bin.c
  53. +3 −3 bn_mp_reduce.c
  54. +1 −1 bn_mp_rshd.c
  55. +1 −1 bn_mp_set_int.c
  56. +9 −2 bn_mp_sqr.c
  57. +2 −2 bn_mp_sqrmod.c
  58. +1 −1 bn_mp_sub.c
  59. +2 −2 bn_mp_sub_d.c
  60. +2 −2 bn_mp_submod.c
  61. +1 −1 bn_mp_to_signed_bin.c
  62. +2 −2 bn_mp_to_unsigned_bin.c
  63. +1 −1 bn_mp_unsigned_bin_size.c
  64. +2 −2 bn_mp_xor.c
  65. +10 −11 bn_radix.c
  66. +1 −1 bn_reverse.c
  67. +35 −27 bn_s_mp_add.c
  68. +6 −20 bn_s_mp_mul_digs.c
  69. +8 −13 bn_s_mp_mul_high_digs.c
  70. +5 −15 bn_s_mp_sqr.c
  71. +32 −22 bn_s_mp_sub.c
  72. +3 −3 bncore.c
  73. +6 −0 changes.txt
  74. +20 −40 demo/demo.c
  75. +12 −7 etc/makefile
  76. +1 −1 etc/pprime.c
  77. +5 −5 etc/tune.c
  78. +27 −0 gen.pl
  79. BIN ltmtest.exe
  80. +4 −4 makefile
  81. BIN mpitest.exe
  82. +1 −1 mtest/mtest.c
  83. +36 −39 timings.txt
  84. +36 −0 timings2.txt
  85. +5 −0 timings3.txt
  86. +5 −1 tommath.h
View
BIN bn.pdf
Binary file not shown.
View
33 bn.tex
@@ -1,7 +1,7 @@
\documentclass{article}
\begin{document}
-\title{LibTomMath v0.12 \\ A Free Multiple Precision Integer Library}
+\title{LibTomMath v0.13 \\ A Free Multiple Precision Integer Library}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
\newpage
@@ -409,7 +409,6 @@ \subsubsection{mp\_and(mp\_int *a, mp\_int *b, mp\_int *c)}
Computes $c = a \land b$, pseudo-extends with zeroes whichever of $a$ or $b$ is shorter such that the length
of $c$ is the maximum length of the two inputs.
-
\subsection{Basic Arithmetic}
\subsubsection{mp\_cmp(mp\_int *a, mp\_int *b)}
@@ -440,19 +439,18 @@ \subsubsection{mp\_mul(mp\_int *a, mp\_int *b, mp\_int *c)}
Computes $c = a \cdot b$ using signed arithmetic. Handles the sign of the numbers correctly which means it will
correct the sign of the product as required, e.g. $a \cdot -b$ turns into $-ab$.
-For relatively small inputs, that is less than 80 digits a standard baseline or comba-baseline multiplier is used. It
-requires no additional memory and $O(N^2)$ time. The comba-baseline multiplier is only used if it can safely be used
-without losing carry digits. The comba method is faster than the baseline method but cannot always be used which is why
-both are provided. The code will automatically determine when it can be used. If the digit count is higher
-than 80 for the inputs than a Karatsuba multiplier is used which requires approximately $O(6 \cdot N)$ memory and
-$O(N^{lg(3)})$ time.
+This function requires $O(N^2)$ time for small inputs and $O(N^{1.584})$ time for relatively large
+inputs (\textit{above the }KARATSUBA\_MUL\_CUTOFF \textit{value defined in bncore.c.}). There is
+considerable overhead in the Karatsuba method which only pays off when the digit count is fairly high
+(\textit{typically around 80}). For small inputs the function requires $O(2N)$ memory, otherwise it
+requires $O(6 \cdot \mbox{lg}(N) \cdot N)$ memory.
+
\subsubsection{mp\_sqr(mp\_int *a, mp\_int *b)}
-Computes $b = a^2$.
-For relatively small inputs, that is less than 80 digits a modified squaring or comba-squaring algorithm is used. It
-requires no additional memory and $O((N^2 + N)/2)$ time. The comba-squaring method is used only if it can be safely used
-without losing carry digits. After 80 digits a Karatsuba squaring algorithm is used whcih requires approximately
-$O(4 \cdot N)$ memory and $O(N^{lg(3)})$ time.
+Computes $b = a^2$ and fixes the sign of $b$ to be positive.
+
+This function has a running time and memory requirement profile very similar to that of the
+mp\_mul function. It is always faster and uses less memory for the larger inputs.
\subsubsection{mp\_div(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)}
Computes $c = \lfloor a/b \rfloor$ and $d \equiv a \mbox{ (mod }b\mbox{)}$. The division is signed which means the sign
@@ -482,7 +480,8 @@ \subsubsection{mp\_addmod, mp\_submod, mp\_mulmod, mp\_sqrmod}
\subsubsection{mp\_invmod(mp\_int *a, mp\_int *b, mp\_int *c)}
This function will find $c = 1/a \mbox{ (mod }b\mbox{)}$ for any value of $a$ such that $(a, b) = 1$ and $b > 0$. When
-$b$ is odd a ``fast'' variant is used which finds the inverse twice as fast.
+$b$ is odd a ``fast'' variant is used which finds the inverse twice as fast. If no inverse is found (e.g. $(a, b) \ne 1$) then
+the function returns \textbf{MP\_VAL} and the result in $c$ is undefined.
\subsubsection{mp\_gcd(mp\_int *a, mp\_int *b, mp\_int *c)}
Finds the greatest common divisor of both $a$ and $b$ and places the result in $c$. Will work with either positive
@@ -497,13 +496,13 @@ \subsubsection{mp\_lcm(mp\_int *a, mp\_int *b, mp\_int *c)}
Functions requires no additional memory and approximately $O(4 \cdot N^2)$ time.
-\subsubsection{mp\_n\_root(mp\_int *a, mp\_digit b, mp\_int c)}
+\subsubsection{mp\_n\_root(mp\_int *a, mp\_digit b, mp\_int *c)}
Finds the $b$'th root of $a$ and stores it in $b$. The roots are found such that $\vert c \vert^b \le \vert a \vert$.
Uses the Newton approximation approach which means it converges in $O(log \beta^N)$ time to a final result. Each iteration
requires $b$ multiplications and one division for a total work of $O(6N^2 \cdot log \beta^N) = O(6N^3 \cdot log \beta)$.
-If the input $a$ is negative and $b$ is even the function returns an error. Otherwise the function will return a root
-that has a sign that agrees with the sign of $a$.
+If the input $a$ is negative and $b$ is even the function returns \textbf{MP\_VAL}. Otherwise the function will
+return a root that has a sign that agrees with the sign of $a$.
\subsubsection{mp\_jacobi(mp\_int *a, mp\_int *n, int *c)}
Computes $c = \left ( {a \over n} \right )$ or the Jacobi function of $(a, n)$ and stores the result in an integer addressed
View
20 bn_fast_mp_invmod.c
@@ -14,13 +14,17 @@
*/
#include <tommath.h>
-/* computes the modular inverse via binary extended euclidean algorithm, that is c = 1/a mod b */
+/* computes the modular inverse via binary extended euclidean algorithm,
+ * that is c = 1/a mod b
+ *
+ * Based on mp_invmod except this is optimized for the case where b is
+ * odd as per HAC Note 14.64 on pp. 610
+ */
int
fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
{
- mp_int x, y, u, v, B, D;
- int res, neg;
-
+ mp_int x, y, u, v, B, D;
+ int res, neg;
if ((res = mp_init (&x)) != MP_OKAY) {
goto __ERR;
@@ -58,7 +62,10 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
goto __D;
}
- /* 2. [modified] if x,y are both even then return an error! */
+ /* 2. [modified] if x,y are both even then return an error!
+ *
+ * That is if gcd(x,y) = 2 * k then obviously there is no inverse.
+ */
if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
res = MP_VAL;
goto __D;
@@ -135,8 +142,9 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
}
/* if not zero goto step 4 */
- if (mp_iszero (&u) == 0)
+ if (mp_iszero (&u) == 0) {
goto top;
+ }
/* now a = C, b = D, gcd == g*v */
View
77 bn_fast_mp_montgomery_reduce.c
@@ -14,12 +14,19 @@
*/
#include <tommath.h>
-/* computes xR^-1 == x (mod N) via Montgomery Reduction (comba) */
+/* computes xR^-1 == x (mod N) via Montgomery Reduction
+ *
+ * This is an optimized implementation of mp_montgomery_reduce
+ * which uses the comba method to quickly calculate the columns of the
+ * reduction.
+ *
+ * Based on Algorithm 14.32 on pp.601 of HAC.
+*/
int
fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
{
- int ix, res, olduse;
- mp_word W[512];
+ int ix, res, olduse;
+ mp_word W[512];
/* get old used count */
olduse = a->used;
@@ -31,14 +38,22 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
}
}
- /* copy the digits of a */
- for (ix = 0; ix < a->used; ix++) {
- W[ix] = a->dp[ix];
- }
+ {
+ register mp_word *_W;
+ register mp_digit *tmpa;
+
+ _W = W;
+ tmpa = a->dp;
+
+ /* copy the digits of a */
+ for (ix = 0; ix < a->used; ix++) {
+ *_W++ = *tmpa++;
+ }
- /* zero the high words */
- for (; ix < m->used * 2 + 1; ix++) {
- W[ix] = 0;
+ /* zero the high words */
+ for (; ix < m->used * 2 + 1; ix++) {
+ *_W++ = 0;
+ }
}
for (ix = 0; ix < m->used; ix++) {
@@ -69,8 +84,10 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
register mp_digit *tmpx;
register mp_word *_W;
- /* aliases */
+ /* alias for the digits of the modulus */
tmpx = m->dp;
+
+ /* Alias for the columns set by an offset of ix */
_W = W + ix;
/* inner loop */
@@ -88,24 +105,32 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
}
- /* copy out, A = A/b^n
- *
- * The result is A/b^n but instead of converting from an array of mp_word
- * to mp_digit than calling mp_rshd we just copy them in the right
- * order
- */
- for (ix = 0; ix < m->used + 1; ix++) {
- a->dp[ix] = W[ix + m->used] & ((mp_word) MP_MASK);
- }
+ {
+ register mp_digit *tmpa;
+ register mp_word *_W;
- /* set the max used */
- a->used = m->used + 1;
+ /* copy out, A = A/b^n
+ *
+ * The result is A/b^n but instead of converting from an array of mp_word
+ * to mp_digit than calling mp_rshd we just copy them in the right
+ * order
+ */
+ tmpa = a->dp;
+ _W = W + m->used;
+
+ for (ix = 0; ix < m->used + 1; ix++) {
+ *tmpa++ = *_W++ & ((mp_word) MP_MASK);
+ }
- /* zero oldused digits, if the input a was larger than
- * m->used+1 we'll have to clear the digits */
- for (; ix < olduse; ix++) {
- a->dp[ix] = 0;
+ /* zero oldused digits, if the input a was larger than
+ * m->used+1 we'll have to clear the digits */
+ for (; ix < olduse; ix++) {
+ *tmpa++ = 0;
+ }
}
+
+ /* set the max used and clamp */
+ a->used = m->used + 1;
mp_clamp (a);
/* if A >= m then A = A - m */
View
62 bn_fast_s_mp_mul_digs.c
@@ -21,13 +21,20 @@
* has the effect of making the nested loops that compute the columns very
* simple and schedulable on super-scalar processors.
*
+ * This has been modified to produce a variable number of digits of output so
+ * if say only a half-product is required you don't have to compute the upper half
+ * (a feature required for fast Barrett reduction).
+ *
+ * Based on Algorithm 14.12 on pp.595 of HAC.
+ *
*/
int
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
- int olduse, res, pa, ix;
- mp_word W[512];
+ int olduse, res, pa, ix;
+ mp_word W[512];
+ /* grow the destination as required */
if (c->alloc < digs) {
if ((res = mp_grow (c, digs)) != MP_OKAY) {
return res;
@@ -43,11 +50,9 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* this multiplier has been modified to allow you to control how many digits
* of output are produced. So at most we want to make upto "digs" digits
- * of output
- */
-
-
- /* this adds products to distinct columns (at ix+iy) of W
+ * of output.
+ *
+ * this adds products to distinct columns (at ix+iy) of W
* note that each step through the loop is not dependent on
* the previous which means the compiler can easily unroll
* the loop without scheduling problems
@@ -85,27 +90,30 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
olduse = c->used;
c->used = digs;
+ {
+ register mp_digit *tmpc;
+
+ /* At this point W[] contains the sums of each column. To get the
+ * correct result we must take the extra bits from each column and
+ * carry them down
+ *
+ * Note that while this adds extra code to the multiplier it saves time
+ * since the carry propagation is removed from the above nested loop.
+ * This has the effect of reducing the work from N*(N+N*c)==N^2 + c*N^2 to
+ * N^2 + N*c where c is the cost of the shifting. On very small numbers
+ * this is slower but on most cryptographic size numbers it is faster.
+ */
+ tmpc = c->dp;
+ for (ix = 1; ix < digs; ix++) {
+ W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
+ *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+ }
+ *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
- /* At this point W[] contains the sums of each column. To get the
- * correct result we must take the extra bits from each column and
- * carry them down
- *
- * Note that while this adds extra code to the multiplier it saves time
- * since the carry propagation is removed from the above nested loop.
- * This has the effect of reducing the work from N*(N+N*c)==N^2 + c*N^2 to
- * N^2 + N*c where c is the cost of the shifting. On very small numbers
- * this is slower but on most cryptographic size numbers it is faster.
- */
-
- for (ix = 1; ix < digs; ix++) {
- W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
- c->dp[ix - 1] = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
- }
- c->dp[digs - 1] = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
-
- /* clear unused */
- for (; ix < olduse; ix++) {
- c->dp[ix] = 0;
+ /* clear unused */
+ for (; ix < olduse; ix++) {
+ *tmpc++ = 0;
+ }
}
mp_clamp (c);
View
13 bn_fast_s_mp_mul_high_digs.c
@@ -20,14 +20,16 @@
*
* This is used in the Barrett reduction since for one of the multiplications
* only the higher digits were needed. This essentially halves the work.
+ *
+ * Based on Algorithm 14.12 on pp.595 of HAC.
*/
int
fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
- int oldused, newused, res, pa, pb, ix;
- mp_word W[512];
-
+ int oldused, newused, res, pa, pb, ix;
+ mp_word W[512];
+ /* calculate size of product and allocate more space if required */
newused = a->used + b->used + 1;
if (c->alloc < newused) {
if ((res = mp_grow (c, newused)) != MP_OKAY) {
@@ -38,7 +40,7 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* like the other comba method we compute the columns first */
pa = a->used;
pb = b->used;
- memset (&W[digs], 0, (pa + pb + 1 - digs) * sizeof (mp_word));
+ memset (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word));
for (ix = 0; ix < pa; ix++) {
{
register mp_digit tmpx, *tmpy;
@@ -75,8 +77,7 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
c->dp[ix - 1] = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
}
- c->dp[(pa + pb + 1) - 1] =
- (mp_digit) (W[(pa + pb + 1) - 1] & ((mp_word) MP_MASK));
+ c->dp[(pa + pb + 1) - 1] = (mp_digit) (W[(pa + pb + 1) - 1] & ((mp_word) MP_MASK));
for (; ix < oldused; ix++) {
c->dp[ix] = 0;
View
57 bn_fast_s_mp_sqr.c
@@ -26,14 +26,16 @@
* "A * B * 2". The *2 part does not need to be computed until the end which is
* good because 64-bit shifts are slow!
*
+ * Based on Algorithm 14.16 on pp.597 of HAC.
*
*/
int
fast_s_mp_sqr (mp_int * a, mp_int * b)
{
- int olduse, newused, res, ix, pa;
- mp_word W2[512], W[512];
+ int olduse, newused, res, ix, pa;
+ mp_word W2[512], W[512];
+ /* calculate size of product and allocate as required */
pa = a->used;
newused = pa + pa + 1;
if (b->alloc < newused) {
@@ -51,15 +53,31 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
* the inner product can be doubled using n doublings instead of
* n^2
*/
- memset (W, 0, newused * sizeof (mp_word));
+ memset (W, 0, newused * sizeof (mp_word));
memset (W2, 0, newused * sizeof (mp_word));
+/* note optimization
+ * values in W2 are only written in even locations which means
+ * we can collapse the array to 256 words [and fixup the memset above]
+ * provided we also fix up the summations below. Ideally
+ * the fixup loop should be unrolled twice to handle the even/odd
+ * cases, and then a final step to handle odd cases [e.g. newused == odd]
+ *
+ * This will not only save ~8*256 = 2KB of stack but lower the number of
+ * operations required to finally fix up the columns
+ */
+
/* This computes the inner product. To simplify the inner N^2 loop
* the multiplication by two is done afterwards in the N loop.
*/
for (ix = 0; ix < pa; ix++) {
- /* compute the outer product */
- W2[ix + ix] += ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
+ /* compute the outer product
+ *
+ * Note that every outer product is computed
+ * for a particular column only once which means that
+ * there is no need todo a double precision addition
+ */
+ W2[ix + ix] = ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
{
register mp_digit tmpx, *tmpy;
@@ -90,22 +108,25 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
W[0] += W[0] + W2[0];
/* now compute digits */
- for (ix = 1; ix < newused; ix++) {
- /* double/add next digit */
- W[ix] += W[ix] + W2[ix];
+ {
+ register mp_digit *tmpb;
- W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
- b->dp[ix - 1] = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
- }
- b->dp[(newused) - 1] = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
+ tmpb = b->dp;
- /* clear high */
- for (; ix < olduse; ix++) {
- b->dp[ix] = 0;
- }
+ for (ix = 1; ix < newused; ix++) {
+ /* double/add next digit */
+ W[ix] += W[ix] + W2[ix];
- /* fix the sign (since we no longer make a fresh temp) */
- b->sign = MP_ZPOS;
+ W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
+ *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
+ }
+ *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
+
+ /* clear high */
+ for (; ix < olduse; ix++) {
+ *tmpb++ = 0;
+ }
+ }
mp_clamp (b);
return MP_OKAY;
View
8 bn_mp_2expt.c
@@ -14,11 +14,15 @@
*/
#include <tommath.h>
-/* computes a = 2^b */
+/* computes a = 2^b
+ *
+ * Simple algorithm which zeroes the int, grows it then just sets one bit
+ * as required.
+ */
int
mp_2expt (mp_int * a, int b)
{
- int res;
+ int res;
mp_zero (a);
if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
View
7 bn_mp_abs.c
@@ -14,11 +14,14 @@
*/
#include <tommath.h>
-/* b = |a| */
+/* b = |a|
+ *
+ * Simple function copies the input and fixes the sign to positive
+ */
int
mp_abs (mp_int * a, mp_int * b)
{
- int res;
+ int res;
if ((res = mp_copy (a, b)) != MP_OKAY) {
return res;
}
View
4 bn_mp_add.c
@@ -18,9 +18,9 @@
int
mp_add (mp_int * a, mp_int * b, mp_int * c)
{
- int sa, sb, res;
-
+ int sa, sb, res;
+ /* get sign of both inputs */
sa = a->sign;
sb = b->sign;
View
5 bn_mp_add_d.c
@@ -18,9 +18,8 @@
int
mp_add_d (mp_int * a, mp_digit b, mp_int * c)
{
- mp_int t;
- int res;
-
+ mp_int t;
+ int res;
if ((res = mp_init (&t)) != MP_OKAY) {
return res;
View
5 bn_mp_addmod.c
@@ -18,9 +18,8 @@
int
mp_addmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
{
- int res;
- mp_int t;
-
+ int res;
+ mp_int t;
if ((res = mp_init (&t)) != MP_OKAY) {
return res;
View
4 bn_mp_and.c
@@ -18,8 +18,8 @@
int
mp_and (mp_int * a, mp_int * b, mp_int * c)
{
- int res, ix, px;
- mp_int t, *x;
+ int res, ix, px;
+ mp_int t, *x;
if (a->used > b->used) {
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
View
8 bn_mp_clamp.c
@@ -14,7 +14,13 @@
*/
#include <tommath.h>
-/* trim unused digits */
+/* trim unused digits
+ *
+ * This is used to ensure that leading zero digits are
+ * trimed and the leading "used" digit will be non-zero
+ * Typically very fast. Also fixes the sign if there
+ * are no more leading digits
+ */
void
mp_clamp (mp_int * a)
{
View
4 bn_mp_cmp.c
@@ -18,13 +18,11 @@
int
mp_cmp (mp_int * a, mp_int * b)
{
- int res;
/* compare based on sign */
if (a->sign == MP_NEG && b->sign == MP_ZPOS) {
return MP_LT;
} else if (a->sign == MP_ZPOS && b->sign == MP_NEG) {
return MP_GT;
}
- res = mp_cmp_mag (a, b);
- return res;
+ return mp_cmp_mag (a, b);
}
View
3 bn_mp_cmp_mag.c
@@ -18,8 +18,7 @@
int
mp_cmp_mag (mp_int * a, mp_int * b)
{
- int n;
-
+ int n;
/* compare based on # of non-zero digits */
if (a->used > b->used) {
View
24 bn_mp_copy.c
@@ -18,8 +18,7 @@
int
mp_copy (mp_int * a, mp_int * b)
{
- int res, n;
-
+ int res, n;
/* if dst == src do nothing */
if (a == b || a->dp == b->dp) {
@@ -35,14 +34,21 @@ mp_copy (mp_int * a, mp_int * b)
b->used = a->used;
b->sign = a->sign;
- /* copy all the digits */
- for (n = 0; n < a->used; n++) {
- b->dp[n] = a->dp[n];
- }
+ {
+ register mp_digit *tmpa, *tmpb;
+
+ tmpa = a->dp;
+ tmpb = b->dp;
+
+ /* copy all the digits */
+ for (n = 0; n < a->used; n++) {
+ *tmpb++ = *tmpa++;
+ }
- /* clear high digits */
- for (n = b->used; n < b->alloc; n++) {
- b->dp[n] = 0;
+ /* clear high digits */
+ for (; n < b->alloc; n++) {
+ *tmpb++ = 0;
+ }
}
return MP_OKAY;
}
View
4 bn_mp_count_bits.c
@@ -18,8 +18,8 @@
int
mp_count_bits (mp_int * a)
{
- int r;
- mp_digit q;
+ int r;
+ mp_digit q;
if (a->used == 0) {
return 0;
View
16 bn_mp_div.c
@@ -26,8 +26,8 @@
int
mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
{
- mp_int q, x, y, t1, t2;
- int res, n, t, i, norm, neg;
+ mp_int q, x, y, t1, t2;
+ int res, n, t, i, norm, neg;
/* is divisor zero ? */
@@ -75,13 +75,12 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
/* normalize both x and y, ensure that y >= b/2, [b == 2^DIGIT_BIT] */
norm = 0;
- while ((y.dp[y.used - 1] & (((mp_digit) 1) << (DIGIT_BIT - 1))) ==
- ((mp_digit) 0)) {
+ while ((y.dp[y.used - 1] & (((mp_digit) 1) << (DIGIT_BIT - 1))) == ((mp_digit) 0)) {
++norm;
- if ((res = mp_mul_2d (&x, 1, &x)) != MP_OKAY) {
+ if ((res = mp_mul_2 (&x, &x)) != MP_OKAY) {
goto __Y;
}
- if ((res = mp_mul_2d (&y, 1, &y)) != MP_OKAY) {
+ if ((res = mp_mul_2 (&y, &y)) != MP_OKAY) {
goto __Y;
}
}
@@ -114,7 +113,7 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
if (x.dp[i] == y.dp[t]) {
q.dp[i - t - 1] = ((1UL << DIGIT_BIT) - 1UL);
} else {
- mp_word tmp;
+ mp_word tmp;
tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
tmp |= ((mp_word) x.dp[i - 1]);
tmp /= ((mp_word) y.dp[t]);
@@ -142,8 +141,7 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
t2.dp[2] = x.dp[i];
t2.used = 3;
- }
- while (mp_cmp (&t1, &t2) == MP_GT);
+ } while (mp_cmp (&t1, &t2) == MP_GT);
/* step 3.3 x = x - q{i-t-1} * y * b^{i-t-1} */
if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
View
33 bn_mp_div_2.c
@@ -18,20 +18,33 @@
int
mp_div_2 (mp_int * a, mp_int * b)
{
- mp_digit r, rr;
- int x, res;
-
+ int x, res, oldused;
/* copy */
- if ((res = mp_copy (a, b)) != MP_OKAY) {
- return res;
+ if (b->alloc < a->used) {
+ if ((res = mp_grow (b, a->used)) != MP_OKAY) {
+ return res;
+ }
}
- r = 0;
- for (x = b->used - 1; x >= 0; x--) {
- rr = b->dp[x] & 1;
- b->dp[x] = (b->dp[x] >> 1) | (r << (DIGIT_BIT - 1));
- r = rr;
+ oldused = b->used;
+ b->used = a->used;
+ {
+ register mp_digit r, rr, *tmpa, *tmpb;
+
+ tmpa = a->dp + b->used - 1;
+ tmpb = b->dp + b->used - 1;
+ r = 0;
+ for (x = b->used - 1; x >= 0; x--) {
+ rr = *tmpa & 1;
+ *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
+ r = rr;
+ }
+
+ tmpb = b->dp + b->used;
+ for (x = b->used; x < oldused; x++) {
+ *tmpb++ = 0;
+ }
}
mp_clamp (b);
return MP_OKAY;
View
6 bn_mp_div_2d.c
@@ -18,9 +18,9 @@
int
mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
{
- mp_digit D, r, rr;
- int x, res;
- mp_int t;
+ mp_digit D, r, rr;
+ int x, res;
+ mp_int t;
/* if the shift count is <= 0 then we do no work */
View
4 bn_mp_div_d.c
@@ -18,8 +18,8 @@
int
mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
{
- mp_int t, t2;
- int res;
+ mp_int t, t2;
+ int res;
if ((res = mp_init (&t)) != MP_OKAY) {
View
2 bn_mp_exch.c
@@ -17,7 +17,7 @@
void
mp_exch (mp_int * a, mp_int * b)
{
- mp_int t;
+ mp_int t;
t = *a;
*a = *b;
View
4 bn_mp_expt_d.c
@@ -17,8 +17,8 @@
int
mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
{
- int res, x;
- mp_int g;
+ int res, x;
+ mp_int g;
if ((res = mp_init_copy (&g, a)) != MP_OKAY) {
View
29 bn_mp_exptmod.c
@@ -14,19 +14,30 @@
*/
#include <tommath.h>
+static int f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
+
+/* this is a shell function that calls either the normal or Montgomery
+ * exptmod functions. Originally the call to the montgomery code was
+ * embedded in the normal function but that wasted alot of stack space
+ * for nothing (since 99% of the time the Montgomery code would be called)
+ */
int
mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
{
- mp_int M[256], res, mu;
- mp_digit buf;
- int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
-
-
/* if the modulus is odd use the fast method */
if (mp_isodd (P) == 1 && P->used > 4 && P->used < MONTGOMERY_EXPT_CUTOFF) {
- err = mp_exptmod_fast (G, X, P, Y);
- return err;
+ return mp_exptmod_fast (G, X, P, Y);
+ } else {
+ return f_mp_exptmod (G, X, P, Y);
}
+}
+
+static int
+f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+{
+ mp_int M[256], res, mu;
+ mp_digit buf;
+ int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* find window size */
x = mp_count_bits (X);
@@ -80,9 +91,7 @@ mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
}
for (x = 0; x < (winsize - 1); x++) {
- if ((err =
- mp_sqr (&M[1 << (winsize - 1)],
- &M[1 << (winsize - 1)])) != MP_OKAY) {
+ if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
View
32 bn_mp_exptmod_fast.c
@@ -24,9 +24,9 @@
int
mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
{
- mp_int M[256], res;
- mp_digit buf, mp;
- int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+ mp_int M[256], res;
+ mp_digit buf, mp;
+ int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* find window size */
x = mp_count_bits (X);
@@ -48,7 +48,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* init G array */
for (x = 0; x < (1 << winsize); x++) {
- if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
+ if ((err = mp_init (&M[x])) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear (&M[y]);
}
@@ -66,44 +66,32 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
goto __RES;
}
- /* now we need R mod m */
- if ((err = mp_2expt (&res, P->used * DIGIT_BIT)) != MP_OKAY) {
- goto __RES;
- }
-
- /* res = R mod m (can use modified double/subtract ...) */
- if ((err = mp_mod (&res, P, &res)) != MP_OKAY) {
- goto __RES;
- }
-
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
- if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
+
+ /* now we need R mod m */
+ if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
goto __RES;
}
/* now set M[1] to G * R mod m */
- if ((err = mp_mulmod (&M[1], &res, P, &M[1])) != MP_OKAY) {
+ if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
goto __RES;
}
-
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __RES;
}
for (x = 0; x < (winsize - 1); x++) {
- if ((err =
- mp_sqr (&M[1 << (winsize - 1)],
- &M[1 << (winsize - 1)])) != MP_OKAY) {
+ if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __RES;
}
- if ((err =
- mp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
+ if ((err = mp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
goto __RES;
}
}
View
10 bn_mp_gcd.c
@@ -19,8 +19,8 @@
int
mp_gcd (mp_int * a, mp_int * b, mp_int * c)
{
- mp_int u, v, t;
- int k, res, neg;
+ mp_int u, v, t;
+ int k, res, neg;
/* either zero than gcd is the largest */
@@ -57,10 +57,10 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c)
k = 0;
while ((u.dp[0] & 1) == 0 && (v.dp[0] & 1) == 0) {
++k;
- if ((res = mp_div_2d (&u, 1, &u, NULL)) != MP_OKAY) {
+ if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto __T;
}
- if ((res = mp_div_2d (&v, 1, &v, NULL)) != MP_OKAY) {
+ if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto __T;
}
}
@@ -80,7 +80,7 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c)
do {
/* B3 (and B4). Halve t, if even */
while (t.used != 0 && (t.dp[0] & 1) == 0) {
- if ((res = mp_div_2d (&t, 1, &t, NULL)) != MP_OKAY) {
+ if ((res = mp_div_2 (&t, &t)) != MP_OKAY) {
goto __T;
}
}
View
3 bn_mp_grow.c
@@ -18,8 +18,7 @@
int
mp_grow (mp_int * a, int size)
{
- int i, n;
-
+ int i, n;
/* if the alloc size is smaller alloc more ram */
if (a->alloc < size) {
View
5 bn_mp_init_copy.c
@@ -18,11 +18,10 @@
int
mp_init_copy (mp_int * a, mp_int * b)
{
- int res;
+ int res;
if ((res = mp_init (a)) != MP_OKAY) {
return res;
}
- res = mp_copy (b, a);
- return res;
+ return mp_copy (b, a);
}
View
8 bn_mp_invmod.c
@@ -17,9 +17,8 @@
int
mp_invmod (mp_int * a, mp_int * b, mp_int * c)
{
- mp_int x, y, u, v, A, B, C, D;
- int res;
-
+ mp_int x, y, u, v, A, B, C, D;
+ int res;
/* b cannot be negative */
if (b->sign == MP_NEG) {
@@ -28,8 +27,7 @@ mp_invmod (mp_int * a, mp_int * b, mp_int * c)
/* if the modulus is odd we can use a faster routine instead */
if (mp_iseven (b) == 0) {
- res = fast_mp_invmod (a, b, c);
- return res;
+ return fast_mp_invmod (a, b, c);
}
if ((res = mp_init (&x)) != MP_OKAY) {
View
6 bn_mp_jacobi.c
@@ -20,9 +20,9 @@
int
mp_jacobi (mp_int * a, mp_int * n, int *c)
{
- mp_int a1, n1, e;
- int s, r, res;
- mp_digit residue;
+ mp_int a1, n1, e;
+ int s, r, res;
+ mp_digit residue;
/* step 1. if a == 0, return 0 */
if (mp_iszero (a) == 1) {
View
4 bn_mp_karatsuba_mul.c
@@ -36,8 +36,8 @@
int
mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
{
- mp_int x0, x1, y0, y1, t1, t2, x0y0, x1y1;
- int B, err, x;
+ mp_int x0, x1, y0, y1, t1, t2, x0y0, x1y1;
+ int B, err, x;
err = MP_MEM;
View
4 bn_mp_karatsuba_sqr.c
@@ -22,8 +22,8 @@
int
mp_karatsuba_sqr (mp_int * a, mp_int * b)
{
- mp_int x0, x1, t1, t2, x0x0, x1x1;
- int B, err, x;
+ mp_int x0, x1, t1, t2, x0x0, x1x1;
+ int B, err, x;
err = MP_MEM;
View
4 bn_mp_lcm.c
@@ -18,8 +18,8 @@
int
mp_lcm (mp_int * a, mp_int * b, mp_int * c)
{
- int res;
- mp_int t;
+ int res;
+ mp_int t;
if ((res = mp_init (&t)) != MP_OKAY) {
View
2 bn_mp_lshd.c
@@ -18,7 +18,7 @@
int
mp_lshd (mp_int * a, int b)
{
- int x, res;
+ int x, res;
/* if its less than zero return */
View
4 bn_mp_mod.c
@@ -18,8 +18,8 @@
int
mp_mod (mp_int * a, mp_int * b, mp_int * c)
{
- mp_int t;
- int res;
+ mp_int t;
+ int res;
if ((res = mp_init (&t)) != MP_OKAY) {
View
5 bn_mp_mod_2d.c
@@ -18,7 +18,7 @@
int
mp_mod_2d (mp_int * a, int b, mp_int * c)
{
- int x, res;
+ int x, res;
/* if b is <= 0 then zero the int */
@@ -44,8 +44,7 @@ mp_mod_2d (mp_int * a, int b, mp_int * c)
}
/* clear the digit that is not completely outside/inside the modulus */
c->dp[b / DIGIT_BIT] &=
- (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) -
- ((mp_digit) 1));
+ (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
mp_clamp (c);
return MP_OKAY;
}
View
4 bn_mp_mod_d.c
@@ -17,8 +17,8 @@
int
mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
{
- mp_int t, t2;
- int res;
+ mp_int t, t2;
+ int res;
if ((res = mp_init (&t)) != MP_OKAY) {
View
53 bn_mp_montgomery_calc_normalization.c
@@ -0,0 +1,53 @@
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca
+ */
+#include <tommath.h>
+
+/* calculates a = B^n mod b for Montgomery reduction
+ * Where B is the base [e.g. 2^DIGIT_BIT].
+ * B^n mod b is computed by first computing
+ * A = B^(n-1) which doesn't require a reduction but a simple OR.
+ * then C = A * B = B^n is computed by performing upto DIGIT_BIT
+ * shifts with subtractions when the result is greater than b.
+ *
+ * The method is slightly modified to shift B unconditionally upto just under
+ * the leading bit of b. This saves alot of multiple precision shifting.
+ */
+int
+mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
+{
+ int x, bits, res;
+
+ /* how many bits of last digit does b use */
+ bits = mp_count_bits (b) % DIGIT_BIT;
+
+ /* compute A = B^(n-1) * 2^(bits-1) */
+ if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
+ return res;
+ }
+
+ /* now compute C = A * B mod b */
+ for (x = bits - 1; x < DIGIT_BIT; x++) {
+ if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
+ return res;
+ }
+ if (mp_cmp_mag (a, b) != MP_LT) {
+ if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
+ return res;
+ }
+ }
+ }
+
+ return MP_OKAY;
+}
View
15 bn_mp_montgomery_reduce.c
@@ -18,14 +18,13 @@
int
mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
{
- int ix, res, digs;
- mp_digit ui;
+ int ix, res, digs;
+ mp_digit ui;
digs = m->used * 2 + 1;
if ((digs < 512)
&& digs < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
- res = fast_mp_montgomery_reduce (a, m, mp);
- return res;
+ return fast_mp_montgomery_reduce (a, m, mp);
}
if (a->alloc < m->used * 2 + 1) {
@@ -51,9 +50,7 @@ mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
mu = 0;
for (iy = 0; iy < m->used; iy++) {
- r =
- ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) +
- ((mp_word) * tmpy);
+ r = ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) + ((mp_word) * tmpy);
mu = (r >> ((mp_word) DIGIT_BIT));
*tmpy++ = (r & ((mp_word) MP_MASK));
}
@@ -71,9 +68,7 @@ mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
/* if A >= m then A = A - m */
if (mp_cmp_mag (a, m) != MP_LT) {
- if ((res = s_mp_sub (a, m, a)) != MP_OKAY) {
- return res;
- }
+ return s_mp_sub (a, m, a);
}
return MP_OKAY;
View
4 bn_mp_montgomery_setup.c
@@ -18,8 +18,8 @@
int
mp_montgomery_setup (mp_int * a, mp_digit * mp)
{
- mp_int t, tt;
- int res;
+ mp_int t, tt;
+ int res;
if ((res = mp_init (&t)) != MP_OKAY) {
return res;
View
18 bn_mp_mul.c
@@ -18,12 +18,26 @@
int
mp_mul (mp_int * a, mp_int * b, mp_int * c)
{
- int res, neg;
+ int res, neg;
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
if (MIN (a->used, b->used) > KARATSUBA_MUL_CUTOFF) {
res = mp_karatsuba_mul (a, b, c);
} else {
- res = s_mp_mul (a, b, c);
+
+ /* can we use the fast multiplier?
+ *
+ * The fast multiplier can be used if the output will have less than
+ * 512 digits and the number of digits won't affect carry propagation
+ */
+ int digs = a->used + b->used + 1;
+
+ if ((digs < 512)
+ && digs < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ res = fast_s_mp_mul_digs (a, b, c, digs);
+ } else {
+ res = s_mp_mul (a, b, c);
+ }
+
}
c->sign = neg;
return res;
View
51 bn_mp_mul_2.c
@@ -18,27 +18,48 @@
int
mp_mul_2 (mp_int * a, mp_int * b)
{
- mp_digit r, rr;
- int x, res;
+ int x, res, oldused;
+ /* Optimization: should copy and shift at the same time */
- /* copy */
- if ((res = mp_copy (a, b)) != MP_OKAY) {
- return res;
+ if (b->alloc < a->used) {
+ if ((res = mp_grow (b, a->used)) != MP_OKAY) {
+ return res;
+ }
}
- if ((res = mp_grow (b, b->used + 1)) != MP_OKAY) {
- return res;
- }
- ++b->used;
+ oldused = b->used;
+ b->used = a->used;
/* shift any bit count < DIGIT_BIT */
- r = 0;
- for (x = 0; x < b->used; x++) {
- rr = (b->dp[x] >> (DIGIT_BIT - 1)) & 1;
- b->dp[x] = ((b->dp[x] << 1) | r) & MP_MASK;
- r = rr;
+ {
+ register mp_digit r, rr, *tmpa, *tmpb;
+
+ r = 0;
+ tmpa = a->dp;
+ tmpb = b->dp;
+ for (x = 0; x < b->used; x++) {
+ rr = *tmpa >> (DIGIT_BIT - 1);
+ *tmpb++ = ((*tmpa++ << 1) | r) & MP_MASK;
+ r = rr;
+ }
+
+ /* new leading digit? */
+ if (r != 0) {
+ if (b->alloc == b->used) {
+ if ((res = mp_grow (b, b->used + 1)) != MP_OKAY) {
+ return res;
+ }
+ }
+ /* add a MSB of 1 */
+ *tmpb = 1;
+ ++b->used;
+ }
+
+ tmpb = b->dp + b->used;
+ for (x = b->used; x < oldused; x++) {
+ *tmpb++ = 0;
+ }
}
- mp_clamp (b);
return MP_OKAY;
}
View
4 bn_mp_mul_2d.c
@@ -18,8 +18,8 @@
int
mp_mul_2d (mp_int * a, int b, mp_int * c)
{
- mp_digit d, r, rr;
- int x, res;
+ mp_digit d, r, rr;
+ int x, res;
/* copy */
View
47 bn_mp_mul_d.c
@@ -18,29 +18,40 @@
int
mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
{
- int res, pa, ix;
- mp_word r;
- mp_digit u;
- mp_int t;
-
+ int res, pa, olduse;
pa = a->used;
- if ((res = mp_init_size (&t, pa + 2)) != MP_OKAY) {
- return res;
+ if (c->alloc < pa + 1) {
+ if ((res = mp_grow (c, pa + 1)) != MP_OKAY) {
+ return res;
+ }
}
- t.used = pa + 2;
- u = 0;
- for (ix = 0; ix < pa; ix++) {
- r = ((mp_word) u) + ((mp_word) a->dp[ix]) * ((mp_word) b);
- t.dp[ix] = (mp_digit) (r & ((mp_word) MP_MASK));
- u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+ olduse = c->used;
+ c->used = pa + 1;
+
+ {
+ register mp_digit u, *tmpa, *tmpc;
+ register mp_word r;
+ register int ix;
+
+ tmpc = c->dp + c->used;
+ for (ix = c->used; ix < olduse; ix++) {
+ *tmpc++ = 0;
+ }
+
+ tmpa = a->dp;
+ tmpc = c->dp;
+
+ u = 0;
+ for (ix = 0; ix < pa; ix++) {
+ r = ((mp_word) u) + ((mp_word) * tmpa++) * ((mp_word) b);
+ *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
+ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
+ }
+ *tmpc = u;
}
- t.dp[ix] = u;
- t.sign = a->sign;
- mp_clamp (&t);
- mp_exch (&t, c);
- mp_clear (&t);
+ mp_clamp (c);
return MP_OKAY;
}
View
4 bn_mp_mulmod.c
@@ -18,8 +18,8 @@
int
mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
{
- int res;
- mp_int t;
+ int res;
+ mp_int t;
if ((res = mp_init (&t)) != MP_OKAY) {
View
8 bn_mp_n_root.c
@@ -17,12 +17,16 @@
/* find the n'th root of an integer
*
* Result found such that (c)^b <= a and (c+1)^b > a
+ *
+ * This algorithm uses Newton's approximation x[i+1] = x[i] - f(x[i])/f'(x[i])
+ * which will find the root in log(N) time where each step involves a fair bit. This
+ * is not meant to find huge roots [square and cube at most].
*/
int
mp_n_root (mp_int * a, mp_digit b, mp_int * c)
{
- mp_int t1, t2, t3;
- int res, neg;
+ mp_int t1, t2, t3;
+ int res, neg;
/* input must be positive if b is even */
if ((b & 1) == 0 && a->sign == MP_NEG) {
View
2 bn_mp_neg.c
@@ -18,7 +18,7 @@
int
mp_neg (mp_int * a, mp_int * b)
{
- int res;
+ int res;
if ((res = mp_copy (a, b)) != MP_OKAY) {
return res;
}
View
4 bn_mp_or.c
@@ -18,8 +18,8 @@
int
mp_or (mp_int * a, mp_int * b, mp_int * c)
{
- int res, ix, px;
- mp_int t, *x;
+ int res, ix, px;
+ mp_int t, *x;
if (a->used > b->used) {
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
View
15 bn_mp_rand.c
@@ -18,28 +18,29 @@
int
mp_rand (mp_int * a, int digits)
{
- int res;
- mp_digit d;
+ int res;
+ mp_digit d;
mp_zero (a);
if (digits <= 0) {
return MP_OKAY;
}
/* first place a random non-zero digit */
- d = ((mp_digit) abs (rand ()));
- d = d == 0 ? 1 : d;
+ do {
+ d = ((mp_digit) abs (rand ()));
+ } while (d == 0);
if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
return res;
}
-
while (digits-- > 0) {
- if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) {
+ if ((res = mp_lshd (a, 1)) != MP_OKAY) {
return res;
}
- if ((res = mp_lshd (a, 1)) != MP_OKAY) {
+
+ if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) {
return res;
}
}
View
2 bn_mp_read_signed_bin.c
@@ -18,7 +18,7 @@
int
mp_read_signed_bin (mp_int * a, unsigned char *b, int c)
{
- int res;
+ int res;
if ((res = mp_read_unsigned_bin (a, b + 1, c - 1)) != MP_OKAY) {
return res;
View
2 bn_mp_read_unsigned_bin.c
@@ -18,7 +18,7 @@
int
mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c)
{
- int res;
+ int res;
mp_zero (a);
while (c-- > 0) {
if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
View
6 bn_mp_reduce.c
@@ -20,7 +20,7 @@
int
mp_reduce_setup (mp_int * a, mp_int * b)
{
- int res;
+ int res;
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
@@ -36,8 +36,8 @@ mp_reduce_setup (mp_int * a, mp_int * b)
int
mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
{
- mp_int q;
- int res, um = m->used;
+ mp_int q;
+ int res, um = m->used;
if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
View
2 bn_mp_rshd.c
@@ -18,7 +18,7 @@
void
mp_rshd (mp_int * a, int b)
{
- int x;
+ int x;
/* if b <= 0 then ignore it */
View
2 bn_mp_set_int.c
@@ -18,7 +18,7 @@
int
mp_set_int (mp_int * a, unsigned long b)
{
- int x, res;
+ int x, res;
mp_zero (a);
View
11 bn_mp_sqr.c
@@ -18,11 +18,18 @@
int
mp_sqr (mp_int * a, mp_int * b)
{
- int res;
+ int res;
if (a->used > KARATSUBA_SQR_CUTOFF) {
res = mp_karatsuba_sqr (a, b);
} else {
- res = s_mp_sqr (a, b);
+
+ /* can we use the fast multiplier? */
+ if (((a->used * 2 + 1) < 512)
+ && a->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT) - 1))) {
+ res = fast_s_mp_sqr (a, b);
+ } else {
+ res = s_mp_sqr (a, b);
+ }
}
b->sign = MP_ZPOS;
return res;
View
4 bn_mp_sqrmod.c
@@ -18,8 +18,8 @@
int
mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
{
- int res;
- mp_int t;
+ int res;
+ mp_int t;
if ((res = mp_init (&t)) != MP_OKAY) {
View
2 bn_mp_sub.c
@@ -18,7 +18,7 @@
int
mp_sub (mp_int * a, mp_int * b, mp_int * c)
{
- int sa, sb, res;
+ int sa, sb, res;
sa = a->sign;
View
4 bn_mp_sub_d.c
@@ -18,8 +18,8 @@
int
mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
{
- mp_int t;
- int res;
+ mp_int t;
+ int res;
if ((res = mp_init (&t)) != MP_OKAY) {
View
4 bn_mp_submod.c
@@ -18,8 +18,8 @@
int
mp_submod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
{
- int res;
- mp_int t;
+ int res;
+ mp_int t;
if ((res = mp_init (&t)) != MP_OKAY) {
View
2 bn_mp_to_signed_bin.c
@@ -18,7 +18,7 @@
int
mp_to_signed_bin (mp_int * a, unsigned char *b)
{
- int res;
+ int res;
if ((res = mp_to_unsigned_bin (a, b + 1)) != MP_OKAY) {
return res;
View
4 bn_mp_to_unsigned_bin.c
@@ -18,8 +18,8 @@
int
mp_to_unsigned_bin (mp_int * a, unsigned char *b)
{
- int x, res;
- mp_int t;
+ int x, res;
+ mp_int t;
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
return res;
View
2 bn_mp_unsigned_bin_size.c
@@ -18,6 +18,6 @@
int
mp_unsigned_bin_size (mp_int * a)
{
- int size = mp_count_bits (a);
+ int size = mp_count_bits (a);
return (size / 8 + ((size & 7) != 0 ? 1 : 0));
}
View
4 bn_mp_xor.c
@@ -18,8 +18,8 @@
int
mp_xor (mp_int * a, mp_int * b, mp_int * c)
{
- int res, ix, px;
- mp_int t, *x;
+ int res, ix, px;
+ mp_int t, *x;
if (a->used > b->used) {
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
View
21 bn_radix.c
@@ -15,16 +15,15 @@
#include <tommath.h>
/* chars used in radix conversions */
-static const char *s_rmap =
- "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+static const char *s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
/* read a string [ASCII] in a given radix */
int
mp_read_radix (mp_int * a, char *str, int radix)
{
- int y, res, neg;
- char ch;
+ int y, res, neg;
+ char ch;
if (radix < 2 || radix > 64) {
return MP_VAL;
@@ -66,10 +65,10 @@ mp_read_radix (mp_int * a, char *str, int radix)
int
mp_toradix (mp_int * a, char *str, int radix)
{
- int res, digs;
- mp_int t;
- mp_digit d;
- char *_s = str;
+ int res, digs;
+ mp_int t;
+ mp_digit d;
+ char *_s = str;
if (radix < 2 || radix > 64) {
return MP_VAL;
@@ -104,9 +103,9 @@ mp_toradix (mp_int * a, char *str, int radix)
int
mp_radix_size (mp_int * a, int radix)
{
- int res, digs;
- mp_int t;
- mp_digit d;
+ int res, digs;
+ mp_int t;
+ mp_digit d;
/* special case for binary */
if (radix == 2) {
View
2 bn_reverse.c
@@ -18,7 +18,7 @@
void
bn_reverse (unsigned char *s, int len)
{
- int ix, iy;
+ int ix, iy;
unsigned char t;
ix = 0;
View
62 bn_s_mp_add.c
@@ -18,10 +18,8 @@
int
s_mp_add (mp_int * a, mp_int * b, mp_int * c)
{
- mp_int *x;
- int olduse, res, min, max, i;
- mp_digit u;
-
+ mp_int *x;
+ int olduse, res, min, max;
/* find sizes, we let |a| <= |b| which means we have to sort
* them. "x" will point to the input with the most digits
@@ -52,38 +50,48 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
/* add digits from lower part */
/* set the carry to zero */
- u = 0;
- for (i = 0; i < min; i++) {
- /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
- c->dp[i] = a->dp[i] + b->dp[i] + u;
-
- /* U = carry bit of T[i] */
- u = (c->dp[i] >> DIGIT_BIT) & 1;
+ {
+ register mp_digit u, *tmpa, *tmpb, *tmpc;
+ register int i;
- /* take away carry bit from T[i] */
- c->dp[i] &= MP_MASK;
- }
+ /* alias for digit pointers */
+ tmpa = a->dp;
+ tmpb = b->dp;
+ tmpc = c->dp;
- /* now copy higher words if any, that is in A+B if A or B has more digits add those in */
- if (min != max) {
- for (; i < max; i++) {
- /* T[i] = X[i] + U */
- c->dp[i] = x->dp[i] + u;
+ u = 0;
+ for (i = 0; i < min; i++) {
+ /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
+ *tmpc = *tmpa++ + *tmpb++ + u;
/* U = carry bit of T[i] */
- u = (c->dp[i] >> DIGIT_BIT) & 1;
+ u = *tmpc >> DIGIT_BIT;
/* take away carry bit from T[i] */
- c->dp[i] &= MP_MASK;
+ *tmpc++ &= MP_MASK;
}
- }
- /* add carry */
- c->dp[i] = u;
+ /* now copy higher words if any, that is in A+B if A or B has more digits add those in */
+ if (min != max) {
+ for (; i < max; i++) {
+ /* T[i] = X[i] + U */
+ *tmpc = x->dp[i] + u;
+
+ /* U = carry bit of T[i] */
+ u = *tmpc >> DIGIT_BIT;
- /* clear digits above used (since we may not have grown result above) */
- for (i = c->used; i < olduse; i++) {
- c->dp[i] = 0;
+ /* take away carry bit from T[i] */
+ *tmpc++ &= MP_MASK;
+ }
+ }
+
+ /* add carry */
+ *tmpc++ = u;
+
+ /* clear digits above used (since we may not have grown result above) */
+ for (i = c->used; i < olduse; i++) {
+ *tmpc++ = 0;
+ }
}
mp_clamp (c);
View
26 bn_s_mp_mul_digs.c
@@ -21,23 +21,11 @@
int
s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
- mp_int t;
- int res, pa, pb, ix, iy;
- mp_digit u;
- mp_word r;
- mp_digit tmpx, *tmpt, *tmpy;
-
-
- /* can we use the fast multiplier?
- *
- * The fast multiplier can be used if the output will have less than
- * 512 digits and the number of digits won't affect carry propagation
- */
- if ((digs < 512)
- && digs < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
- res = fast_s_mp_mul_digs (a, b, c, digs);
- return res;
- }
+ mp_int t;
+ int res, pa, pb, ix, iy;
+ mp_digit u;
+ mp_word r;
+ mp_digit tmpx, *tmpt, *tmpy;
if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
return res;
@@ -61,9 +49,7 @@ s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* compute the columns of the output and propagate the carry */
for (iy = 0; iy < pb; iy++) {
/* compute the column as a mp_word */
- r =
- ((mp_word) * tmpt) + ((mp_word) tmpx) * ((mp_word) * tmpy++) +
- ((mp_word) u);
+ r = ((mp_word) * tmpt) + ((mp_word) tmpx) * ((mp_word) * tmpy++) + ((mp_word) u);
/* the new column is the lower part of the result */
*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
<