Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

file 129 lines (108 sloc) 3.374 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
#include <tommath.h>
#ifdef BN_FAST_S_MP_SQR_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/

/* fast squaring
*
* This is the comba method where the columns of the product
* are computed first then the carries are computed. This
* has the effect of making a very simple inner loop that
* is executed the most
*
* W2 represents the outer products and W the inner.
*
* A further optimizations is made because the inner
* products are of the form "A * B * 2". The *2 part does
* not need to be computed until the end which is good
* because 64-bit shifts are slow!
*
* Based on Algorithm 14.16 on pp.597 of HAC.
*
*/
/* the jist of squaring...

you do like mult except the offset of the tmpx [one that starts closer to zero]
can't equal the offset of tmpy. So basically you set up iy like before then you min it with
(ty-tx) so that it never happens. You double all those you add in the inner loop

After that loop you do the squares and add them in.

Remove W2 and don't memset W

*/

int fast_s_mp_sqr (mp_int * a, mp_int * b)
{
  int olduse, res, pa, ix, iz;
  mp_digit W[MP_WARRAY], *tmpx;
  mp_word W1;

  /* grow the destination as required */
  pa = a->used + a->used;
  if (b->alloc < pa) {
    if ((res = mp_grow (b, pa)) != MP_OKAY) {
      return res;
    }
  }

  /* number of output digits to produce */
  W1 = 0;
  for (ix = 0; ix <= pa; ix++) {
      int tx, ty, iy;
      mp_word _W;
      mp_digit *tmpy;

      /* clear counter */
      _W = 0;

      /* get offsets into the two bignums */
      ty = MIN(a->used-1, ix);
      tx = ix - ty;

      /* setup temp aliases */
      tmpx = a->dp + tx;
      tmpy = a->dp + ty;

      /* this is the number of times the loop will iterrate, essentially its
while (tx++ < a->used && ty-- >= 0) { ... }
*/
      iy = MIN(a->used-tx, ty+1);

      /* now for squaring tx can never equal ty
* we halve the distance since they approach at a rate of 2x
* and we have to round because odd cases need to be executed
*/
      iy = MIN(iy, (ty-tx+1)>>1);

      /* execute loop */
      for (iz = 0; iz < iy; iz++) {
         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
      }

      /* double the inner product and add carry */
      _W = _W + _W + W1;

      /* even columns have the square term in them */
      if ((ix&1) == 0) {
         _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
      }

      /* store it */
      W[ix] = _W;

      /* make next carry */
      W1 = _W >> ((mp_word)DIGIT_BIT);
  }

  /* setup dest */
  olduse = b->used;
  b->used = a->used+a->used;

  {
    mp_digit *tmpb;
    tmpb = b->dp;
    for (ix = 0; ix < pa; ix++) {
      *tmpb++ = W[ix] & MP_MASK;
    }

    /* clear unused digits [that existed in the old copy of c] */
    for (; ix < olduse; ix++) {
      *tmpb++ = 0;
    }
  }
  mp_clamp (b);
  return MP_OKAY;
}
#endif
Something went wrong with that request. Please try again.