added libtommath-0.18

libtom · Jul 15, 2010 · 0ef44ce · 0ef44ce
1 parent fd181cc
commit 0ef44ce
Show file tree

Hide file tree

Showing 108 changed files with 10,043 additions and 2,693 deletions.
diff --git a/bn.pdf b/bn.pdf
diff --git a/bn.tex b/bn.tex
@@ -1,7 +1,7 @@
 \documentclass[]{article}
 \begin{document}
 
-\title{LibTomMath v0.17 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
+\title{LibTomMath v0.18 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
 \author{Tom St Denis \\ tomstdenis@iahu.ca}
 \maketitle
 \newpage

diff --git a/bn_fast_mp_montgomery_reduce.c b/bn_fast_mp_montgomery_reduce.c
@@ -14,7 +14,7 @@
  */
 #include <tommath.h>
 
-/* computes xR^-1 == x (mod N) via Montgomery Reduction 
+/* computes xR**-1 == x (mod N) via Montgomery Reduction 
  * 
  * This is an optimized implementation of mp_montgomery_reduce 
  * which uses the comba method to quickly calculate the columns of the
@@ -23,76 +23,77 @@
  * Based on Algorithm 14.32 on pp.601 of HAC.
 */
 int
-fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
+fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 {
   int     ix, res, olduse;
   mp_word W[MP_WARRAY];
 
   /* get old used count */
-  olduse = a->used;
+  olduse = x->used;
 
   /* grow a as required */
-  if (a->alloc < m->used + 1) {
+  if (x->alloc < n->used + 1) {
-    if ((res = mp_grow (a, m->used + 1)) != MP_OKAY) {
+    if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
       return res;
     }
   }
 
   {
     register mp_word *_W;
-    register mp_digit *tmpa;
+    register mp_digit *tmpx;
 
     _W = W;
-    tmpa = a->dp;
+    tmpx = x->dp;
 
     /* copy the digits of a into W[0..a->used-1] */
-    for (ix = 0; ix < a->used; ix++) {
+    for (ix = 0; ix < x->used; ix++) {
-      *_W++ = *tmpa++;
+      *_W++ = *tmpx++;
     }
 
     /* zero the high words of W[a->used..m->used*2] */
-    for (; ix < m->used * 2 + 1; ix++) {
+    for (; ix < n->used * 2 + 1; ix++) {
       *_W++ = 0;
     }
   }
 
-  for (ix = 0; ix < m->used; ix++) {
+  for (ix = 0; ix < n->used; ix++) {
-    /* ui = ai * m' mod b
+    /* mu = ai * m' mod b
      *
      * We avoid a double precision multiplication (which isn't required)
-     * by casting the value down to a mp_digit.  Note this requires that W[ix-1] have
+     * by casting the value down to a mp_digit.  Note this requires 
-     * the carry cleared (see after the inner loop)
+     * that W[ix-1] have  the carry cleared (see after the inner loop)
      */
-    register mp_digit ui;
+    register mp_digit mu;
-    ui = (((mp_digit) (W[ix] & MP_MASK)) * mp) & MP_MASK;
+    mu = (((mp_digit) (W[ix] & MP_MASK)) * rho) & MP_MASK;
 
-    /* a = a + ui * m * b^i
+    /* a = a + mu * m * b**i
      *
      * This is computed in place and on the fly.  The multiplication
-     * by b^i is handled by offseting which columns the results
+     * by b**i is handled by offseting which columns the results
      * are added to.
      *
-     * Note the comba method normally doesn't handle carries in the inner loop
+     * Note the comba method normally doesn't handle carries in the 
-     * In this case we fix the carry from the previous column since the Montgomery
+     * inner loop In this case we fix the carry from the previous 
-     * reduction requires digits of the result (so far) [see above] to work.  This is
+     * column since the Montgomery reduction requires digits of the 
-     * handled by fixing up one carry after the inner loop.  The carry fixups are done
+     * result (so far) [see above] to work.  This is
-     * in order so after these loops the first m->used words of W[] have the carries
+     * handled by fixing up one carry after the inner loop.  The 
-     * fixed
+     * carry fixups are done in order so after these loops the 
+     * first m->used words of W[] have the carries fixed
      */
     {
       register int iy;
-      register mp_digit *tmpx;
+      register mp_digit *tmpn;
       register mp_word *_W;
 
       /* alias for the digits of the modulus */
-      tmpx = m->dp;
+      tmpn = n->dp;
 
       /* Alias for the columns set by an offset of ix */
       _W = W + ix;
 
       /* inner loop */
-      for (iy = 0; iy < m->used; iy++) {
+      for (iy = 0; iy < n->used; iy++) {
-    *_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
+          *_W++ += ((mp_word) mu) * ((mp_word) * tmpn++);
       }
     }
 
@@ -102,44 +103,44 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
 
 
   {
-    register mp_digit *tmpa;
+    register mp_digit *tmpx;
     register mp_word *_W, *_W1;
 
     /* nox fix rest of carries */
     _W1 = W + ix;
     _W = W + ++ix;
 
-    for (; ix <= m->used * 2 + 1; ix++) {
+    for (; ix <= n->used * 2 + 1; ix++) {
       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
     }
 
-    /* copy out, A = A/b^n
+    /* copy out, A = A/b**n
      *
-     * The result is A/b^n but instead of converting from an array of mp_word
+     * The result is A/b**n but instead of converting from an 
-     * to mp_digit than calling mp_rshd we just copy them in the right
+     * array of mp_word to mp_digit than calling mp_rshd 
-     * order
+     * we just copy them in the right order
      */
-    tmpa = a->dp;
+    tmpx = x->dp;
-    _W = W + m->used;
+    _W = W + n->used;
 
-    for (ix = 0; ix < m->used + 1; ix++) {
+    for (ix = 0; ix < n->used + 1; ix++) {
-      *tmpa++ = *_W++ & ((mp_word) MP_MASK);
+      *tmpx++ = *_W++ & ((mp_word) MP_MASK);
     }
 
     /* zero oldused digits, if the input a was larger than
      * m->used+1 we'll have to clear the digits */
     for (; ix < olduse; ix++) {
-      *tmpa++ = 0;
+      *tmpx++ = 0;
     }
   }
 
   /* set the max used and clamp */
-  a->used = m->used + 1;
+  x->used = n->used + 1;
-  mp_clamp (a);
+  mp_clamp (x);
 
   /* if A >= m then A = A - m */
-  if (mp_cmp_mag (a, m) != MP_LT) {
+  if (mp_cmp_mag (x, n) != MP_LT) {
-    return s_mp_sub (a, m, a);
+    return s_mp_sub (x, n, x);
   }
   return MP_OKAY;
 }
diff --git a/bn_fast_s_mp_sqr.c b/bn_fast_s_mp_sqr.c
@@ -16,15 +16,17 @@
 
 /* fast squaring
  *
- * This is the comba method where the columns of the product are computed first
+ * This is the comba method where the columns of the product 
- * then the carries are computed.  This has the effect of making a very simple
+ * are computed first then the carries are computed.  This 
- * inner loop that is executed the most
+ * has the effect of making a very simple inner loop that 
+ * is executed the most
  *
  * W2 represents the outer products and W the inner.
  *
- * A further optimizations is made because the inner products are of the form
+ * A further optimizations is made because the inner 
- * "A * B * 2".  The *2 part does not need to be computed until the end which is
+ * products are of the form "A * B * 2".  The *2 part does 
- * good because 64-bit shifts are slow!
+ * not need to be computed until the end which is good 
+ * because 64-bit shifts are slow!
  *
  * Based on Algorithm 14.16 on pp.597 of HAC.
  *
@@ -48,26 +50,15 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
    * Note that there are two buffers.  Since squaring requires
    * a outter and inner product and the inner product requires
    * computing a product and doubling it (a relatively expensive
-   * op to perform n^2 times if you don't have to) the inner and
+   * op to perform n**2 times if you don't have to) the inner and
    * outer products are computed in different buffers.  This way
    * the inner product can be doubled using n doublings instead of
-   * n^2
+   * n**2
    */
   memset (W, 0, newused * sizeof (mp_word));
   memset (W2, 0, newused * sizeof (mp_word));
 
-/* note optimization
+  /* This computes the inner product.  To simplify the inner N**2 loop
- * values in W2 are only written in even locations which means
- * we can collapse the array to 256 words [and fixup the memset above]
- * provided we also fix up the summations below.  Ideally
- * the fixup loop should be unrolled twice to handle the even/odd
- * cases, and then a final step to handle odd cases [e.g. newused == odd]
- *
- * This will not only save ~8*256 = 2KB of stack but lower the number of
- * operations required to finally fix up the columns
- */
-
-  /* This computes the inner product.  To simplify the inner N^2 loop
    * the multiplication by two is done afterwards in the N loop.
    */
   for (ix = 0; ix < pa; ix++) {
@@ -101,28 +92,33 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
   }
 
   /* setup dest */
-  olduse = b->used;
+  olduse  = b->used;
   b->used = newused;
 
-  /* double first value, since the inner products are half of what they should be */
-  W[0] += W[0] + W2[0];
-
   /* now compute digits */
   {
     register mp_digit *tmpb;
 
-    tmpb = b->dp;
+    /* double first value, since the inner products are 
+     * half of what they should be 
+     */
+    W[0] += W[0] + W2[0];
 
+    tmpb = b->dp;
     for (ix = 1; ix < newused; ix++) {
       /* double/add next digit */
       W[ix] += W[ix] + W2[ix];
 
       W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
       *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
     }
+    /* set the last value.  Note even if the carry is zero 
+     * this is required since the next step will not zero 
+     * it if b originally had a value at b->dp[2*a.used]
+     */
     *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
 
-    /* clear high */
+    /* clear high digits */
     for (; ix < olduse; ix++) {
       *tmpb++ = 0;
     }

diff --git a/bn_mp_2expt.c b/bn_mp_2expt.c
@@ -14,7 +14,7 @@
  */
 #include <tommath.h>
 
-/* computes a = 2^b 
+/* computes a = 2**b 
  *
  * Simple algorithm which zeroes the int, grows it then just sets one bit
  * as required.

diff --git a/bn_mp_copy.c b/bn_mp_copy.c
@@ -21,7 +21,7 @@ mp_copy (mp_int * a, mp_int * b)
   int     res, n;
 
   /* if dst == src do nothing */
-  if (a == b || a->dp == b->dp) {
+  if (a == b) {
     return MP_OKAY;
   }
 

diff --git a/bn_mp_count_bits.c b/bn_mp_count_bits.c
@@ -21,11 +21,15 @@ mp_count_bits (mp_int * a)
   int     r;
   mp_digit q;
 
+  /* shortcut */
   if (a->used == 0) {
     return 0;
   }
 
+  /* get number of digits and add that */
   r = (a->used - 1) * DIGIT_BIT;
+
+  /* take the last digit and count the bits in it */
   q = a->dp[a->used - 1];
   while (q > ((mp_digit) 0)) {
     ++r;

diff --git a/bn_mp_div_2d.c b/bn_mp_div_2d.c
@@ -14,7 +14,7 @@
  */
 #include <tommath.h>
 
-/* shift right by a certain bit count (store quotient in c, remainder in d) */
+/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
 int
 mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
 {
@@ -81,7 +81,6 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
     }
   }
   mp_clamp (c);
-  res = MP_OKAY;
   if (d != NULL) {
     mp_exch (&t, d);
   }

diff --git a/bn_mp_div_3.c b/bn_mp_div_3.c
@@ -0,0 +1,64 @@
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is library that provides for multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library is designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+#include <tommath.h>
+
+/* divide by three (based on routine from MPI and the GMP manual) */
+int
+mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
+{
+  mp_int   q;
+  mp_word  w, t;
+  mp_digit b;
+  int      res, ix;
+
+  /* b = 2**DIGIT_BIT / 3 */
+  b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
+
+  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
+     return res;
+  }
+
+  q.used = a->used;
+  q.sign = a->sign;
+  w = 0;
+  for (ix = a->used - 1; ix >= 0; ix--) {
+     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
+
+     if (w >= 3) {
+        t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
+        w -= (t << ((mp_word)1)) + t;
+        while (w >= 3) {
+           t += 1;
+           w -= 3;
+        }
+      } else {
+        t = 0;
+      }
+      q.dp[ix] = t;
+  }
+
+  if (d != NULL) {
+     *d = w;
+  }
+
+  if (c != NULL) {
+     mp_clamp(&q);
+     mp_exch(&q, c);
+  }
+  mp_clear(&q);
+
+  return res;
+}
+