Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

124 lines (108 sloc) 3.736 kb
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* Fast (comba) multiplier
*
* This is the fast column-array [comba] multiplier. It is
* designed to compute the columns of the product first
* then handle the carries afterwards. This has the effect
* of making the nested loops that compute the columns very
* simple and schedulable on super-scalar processors.
*
* This has been modified to produce a variable number of
* digits of output so if say only a half-product is required
* you don't have to compute the upper half (a feature
* required for fast Barrett reduction).
*
* Based on Algorithm 14.12 on pp.595 of HAC.
*
*/
int
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
int olduse, res, pa, ix;
mp_word W[MP_WARRAY];
/* grow the destination as required */
if (c->alloc < digs) {
if ((res = mp_grow (c, digs)) != MP_OKAY) {
return res;
}
}
/* clear temp buf (the columns) */
memset (W, 0, sizeof (mp_word) * digs);
/* calculate the columns */
pa = a->used;
for (ix = 0; ix < pa; ix++) {
/* this multiplier has been modified to allow you to
* control how many digits of output are produced.
* So at most we want to make upto "digs" digits of output.
*
* this adds products to distinct columns (at ix+iy) of W
* note that each step through the loop is not dependent on
* the previous which means the compiler can easily unroll
* the loop without scheduling problems
*/
{
register mp_digit tmpx, *tmpy;
register mp_word *_W;
register int iy, pb;
/* alias for the the word on the left e.g. A[ix] * A[iy] */
tmpx = a->dp[ix];
/* alias for the right side */
tmpy = b->dp;
/* alias for the columns, each step through the loop adds a new
term to each column
*/
_W = W + ix;
/* the number of digits is limited by their placement. E.g.
we avoid multiplying digits that will end up above the # of
digits of precision requested
*/
pb = MIN (b->used, digs - ix);
for (iy = 0; iy < pb; iy++) {
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
}
}
}
/* setup dest */
olduse = c->used;
c->used = digs;
{
register mp_digit *tmpc;
/* At this point W[] contains the sums of each column. To get the
* correct result we must take the extra bits from each column and
* carry them down
*
* Note that while this adds extra code to the multiplier it
* saves time since the carry propagation is removed from the
* above nested loop.This has the effect of reducing the work
* from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the
* cost of the shifting. On very small numbers this is slower
* but on most cryptographic size numbers it is faster.
*/
tmpc = c->dp;
for (ix = 1; ix < digs; ix++) {
W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
*tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
}
*tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
/* clear unused */
for (; ix < olduse; ix++) {
*tmpc++ = 0;
}
}
mp_clamp (c);
return MP_OKAY;
}
Jump to Line
Something went wrong with that request. Please try again.