# libtom/libtommath

1 parent 40c00ad commit fb93a30a250ae8f290ece618d7adb76a985b6556 Tom St Denis committed with sjaeckel Feb 28, 2003
Showing with 954 additions and 98 deletions.
1. +428 −21 bn.c
2. +8 −1 bn.h
3. bn.pdf
4. +47 −14 bn.tex
5. +5 −0 changes.txt
6. +30 −17 demo.c
7. +60 −44 etc/pprime.c
8. +1 −1 makefile
9. +302 −0 poly.c
10. +73 −0 poly.h
449 bn.c
9 bn.h
 @@ -84,6 +84,7 @@ typedef int mp_err; /* you'll have to tune these... */ #define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */ #define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */ +#define MONTGOMERY_EXPT_CUTOFF 40 /* max. number of digits that montgomery reductions will help for */ #define MP_PREC 64 /* default digits of precision */ @@ -114,7 +115,7 @@ int mp_shrink(mp_int *a); #define mp_iszero(a) (((a)->used == 0) ? 1 : 0) #define mp_iseven(a) (((a)->used == 0 || (((a)->dp[0] & 1) == 0)) ? 1 : 0) -#define mp_isodd(a) (((a)->used > 0 || (((a)->dp[0] & 1) == 1)) ? 1 : 0) +#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? 1 : 0) /* set to zero */ void mp_zero(mp_int *a); @@ -256,6 +257,12 @@ int mp_reduce_setup(mp_int *a, mp_int *b); * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code]. */ int mp_reduce(mp_int *a, mp_int *b, mp_int *c); + +/* setups the montgomery reduction */ +int mp_montgomery_setup(mp_int *a, mp_digit *mp); + +/* computes xR^-1 == x (mod N) via Montgomery Reduction */ +int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp); /* d = a^b (mod c) */ int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
BIN bn.pdf
Binary file not shown.
61 bn.tex
 @@ -1,7 +1,7 @@ \documentclass{article} \begin{document} -\title{LibTomMath v0.09 \\ A Free Multiple Precision Integer Library} +\title{LibTomMath v0.10 \\ A Free Multiple Precision Integer Library} \author{Tom St Denis \\ tomstdenis@iahu.ca} \maketitle \newpage @@ -279,6 +279,22 @@ \subsection{Modular Arithmetic} /* computes the jacobi c = (a | n) (or Legendre if b is prime) */ int mp_jacobi(mp_int *a, mp_int *n, int *c); +/* used to setup the Barrett reduction for a given modulus b */ +int mp_reduce_setup(mp_int *a, mp_int *b); + +/* Barrett Reduction, computes a (mod b) with a precomputed value c + * + * Assumes that 0 < a <= b^2, note if 0 > a > -(b^2) then you can merely + * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code]. + */ +int mp_reduce(mp_int *a, mp_int *b, mp_int *c); + +/* setups the montgomery reduction */ +int mp_montgomery_setup(mp_int *a, mp_digit *mp); + +/* computes xR^-1 == x (mod N) via Montgomery Reduction */ +int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp); + /* d = a^b (mod c) */ int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d); \end{verbatim} @@ -451,21 +467,38 @@ \subsubsection{mp\_jacobi(mp\_int *a, mp\_int *n, int *c)} natural to store the result in a simple C style \textbf{int}. If $n$ is prime then the Jacobi function produces the same results as the Legendre function\footnote{Source: Handbook of Applied Cryptography, pp. 73}. This means if $n$ is prime then $\left ( {a \over n} \right )$ is equal to $1$ if $a$ is a quadratic residue modulo $n$ or $-1$ if -it is not. +it is not. \subsubsection{mp\_exptmod(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)} Computes $d = a^b \mbox{ (mod }c\mbox{)}$ using a sliding window $k$-ary exponentiation algorithm. For an $\alpha$-bit exponent it performs $\alpha$ squarings and at most $\lfloor \alpha/k \rfloor + k$ multiplications. The value of $k$ is -chosen to minimize the number of multiplications required for a given value of $\alpha$. Barrett reductions are used -to reduce the squared or multiplied temporary results modulo $c$. A Barrett reduction requires one division that is -performed only and two half multipliers of $N$ digit numbers resulting in approximation $O((N^2)/2)$ work. +chosen to minimize the number of multiplications required for a given value of $\alpha$. Barrett or Montgomery +reductions are used to reduce the squared or multiplied temporary results modulo $c$. + +\subsection{Fast Modular Reductions} + +\subsubsection{mp\_reduce(mp\_int *a, mp\_int *b, mp\_int *c)} +Computes a Barrett reduction in-place of $a$ modulo $b$ with respect to $c$. In essence it computes +$a \equiv a \mbox{ (mod }b\mbox{)}$ provided $0 \le a \le b^2$. The value of $c$ is precomputed with the +function mp\_reduce\_setup(). + +The Barrett reduction function has been optimized to use partial multipliers which means compared to MPI it performs +have the number of single precision multipliers (\textit{provided they have the same size digits}). The partial +multipliers (\textit{one of which is shared with mp\_mul}) both have baseline and comba variants. Barrett reduction +can reduce a number modulo a $n-$digit modulus with approximately $2n^2$ single precision multiplications. + +\subsubsection{mp\_montgomery\_reduce(mp\_int *a, mp\_int *m, mp\_digit mp)} +Computes a Montgomery reduction in-place of $a$ modulo $b$ with respect to $mp$. If $b$ is some $n-$digit modulus then +$R = \beta^{n+1}$. The result of this function is $aR^{-1} \mbox{ (mod }b\mbox{)}$ provided that $0 \le a \le b^2$. +The value of $mp$ is precomputed with the function mp\_montgomery\_setup(). + +The Montgomery reduction comes in two variants. A standard baseline and a fast comba method. The baseline routine +is in fact slower than the Barrett reductions, however, the comba routine is much faster. Montomgery reduction can +reduce a number modulo a $n-$digit modulus with approximately $n^2 + n$ single precision multiplications. -Let $\gamma = \lfloor \alpha/k \rfloor + k$ represent the total multiplications. The total work of a exponentiation is -therefore, $O(3 \cdot N^2 + (\alpha + \gamma) \cdot ((N^2)/2) + \alpha \cdot ((N^2 + N)/2) + \gamma \cdot N^2)$ which -simplies to $O(3 \cdot N^2 + \gamma N^2 + \alpha (N^2 + (N/2)))$. The bulk of the time is spent in the Barrett -reductions and the squaring algorithms. Since $\gamma < \alpha$ it makes sense to optimize first the Barrett and -squaring routines first. Significant improvements in the future will most likely stem from optimizing these instead -of optimizing the multipliers. +Note that the final result of a Montgomery reduction is not just the value reduced modulo $b$. You have to multiply +by $R$ modulo $b$ to get the real result. At first that may not seem like such a worthwhile routine, however, the +exptmod function can be made to take advantage of this such that only one normalization at the end is required. \section{Timing Analysis} \subsection{Observed Timings} @@ -503,9 +536,9 @@ \subsection{Observed Timings} Square & 2048 & 72,126 & 17,621 \\ Square & 4096 & 306,269 & 67,576 \\ \hline -Exptmod & 512 & 32,021,586 & 4,138,354 \\ -Exptmod & 768 & 97,595,492 & 9,840,233 \\ -Exptmod & 1024 & 223,302,532 & 20,624,553 \\ +Exptmod & 512 & 32,021,586 & 3,118,435 \\ +Exptmod & 768 & 97,595,492 & 8,493,633 \\ +Exptmod & 1024 & 223,302,532 & 17,715,899 \\ Exptmod & 2048 & 1,682,223,369 & 114,936,361 \\ Exptmod & 2560 & 3,268,615,571 & 229,402,426 \\ Exptmod & 3072 & 5,597,240,141 & 367,403,840 \\
 @@ -1,3 +1,8 @@ +Jan 9th, 2003 +v0.10 -- Pekka Riikonen suggested fixes to the radix conversion code. + -- Added baseline montgomery and comba montgomery reductions, sped up exptmods + [to a point, see bn.h for MONTGOMERY_EXPT_CUTOFF] + Jan 6th, 2003 v0.09 -- Updated the manual to reflect recent changes. :-) -- Added Jacobi function (mp_jacobi) to supplement the number theory side of the lib
47 demo.c