Permalink
Browse files

added libtommath-0.10

  • Loading branch information...
1 parent 40c00ad commit fb93a30a250ae8f290ece618d7adb76a985b6556 Tom St Denis committed with sjaeckel Feb 28, 2003
Showing with 954 additions and 98 deletions.
  1. +428 −21 bn.c
  2. +8 −1 bn.h
  3. BIN bn.pdf
  4. +47 −14 bn.tex
  5. +5 −0 changes.txt
  6. +30 −17 demo.c
  7. +60 −44 etc/pprime.c
  8. +1 −1 makefile
  9. +302 −0 poly.c
  10. +73 −0 poly.h
View
449 bn.c

Large diffs are not rendered by default.

Oops, something went wrong.
View
9 bn.h
@@ -84,6 +84,7 @@ typedef int mp_err;
/* you'll have to tune these... */
#define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */
#define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */
+#define MONTGOMERY_EXPT_CUTOFF 40 /* max. number of digits that montgomery reductions will help for */
#define MP_PREC 64 /* default digits of precision */
@@ -114,7 +115,7 @@ int mp_shrink(mp_int *a);
#define mp_iszero(a) (((a)->used == 0) ? 1 : 0)
#define mp_iseven(a) (((a)->used == 0 || (((a)->dp[0] & 1) == 0)) ? 1 : 0)
-#define mp_isodd(a) (((a)->used > 0 || (((a)->dp[0] & 1) == 1)) ? 1 : 0)
+#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? 1 : 0)
/* set to zero */
void mp_zero(mp_int *a);
@@ -256,6 +257,12 @@ int mp_reduce_setup(mp_int *a, mp_int *b);
* compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
*/
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
+
+/* setups the montgomery reduction */
+int mp_montgomery_setup(mp_int *a, mp_digit *mp);
+
+/* computes xR^-1 == x (mod N) via Montgomery Reduction */
+int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
/* d = a^b (mod c) */
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
View
BIN bn.pdf
Binary file not shown.
View
61 bn.tex
@@ -1,7 +1,7 @@
\documentclass{article}
\begin{document}
-\title{LibTomMath v0.09 \\ A Free Multiple Precision Integer Library}
+\title{LibTomMath v0.10 \\ A Free Multiple Precision Integer Library}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
\newpage
@@ -279,6 +279,22 @@ \subsection{Modular Arithmetic}
/* computes the jacobi c = (a | n) (or Legendre if b is prime) */
int mp_jacobi(mp_int *a, mp_int *n, int *c);
+/* used to setup the Barrett reduction for a given modulus b */
+int mp_reduce_setup(mp_int *a, mp_int *b);
+
+/* Barrett Reduction, computes a (mod b) with a precomputed value c
+ *
+ * Assumes that 0 < a <= b^2, note if 0 > a > -(b^2) then you can merely
+ * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
+ */
+int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
+
+/* setups the montgomery reduction */
+int mp_montgomery_setup(mp_int *a, mp_digit *mp);
+
+/* computes xR^-1 == x (mod N) via Montgomery Reduction */
+int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
+
/* d = a^b (mod c) */
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
\end{verbatim}
@@ -451,21 +467,38 @@ \subsubsection{mp\_jacobi(mp\_int *a, mp\_int *n, int *c)}
natural to store the result in a simple C style \textbf{int}. If $n$ is prime then the Jacobi function produces
the same results as the Legendre function\footnote{Source: Handbook of Applied Cryptography, pp. 73}. This means if
$n$ is prime then $\left ( {a \over n} \right )$ is equal to $1$ if $a$ is a quadratic residue modulo $n$ or $-1$ if
-it is not.
+it is not.
\subsubsection{mp\_exptmod(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)}
Computes $d = a^b \mbox{ (mod }c\mbox{)}$ using a sliding window $k$-ary exponentiation algorithm. For an $\alpha$-bit
exponent it performs $\alpha$ squarings and at most $\lfloor \alpha/k \rfloor + k$ multiplications. The value of $k$ is
-chosen to minimize the number of multiplications required for a given value of $\alpha$. Barrett reductions are used
-to reduce the squared or multiplied temporary results modulo $c$. A Barrett reduction requires one division that is
-performed only and two half multipliers of $N$ digit numbers resulting in approximation $O((N^2)/2)$ work.
+chosen to minimize the number of multiplications required for a given value of $\alpha$. Barrett or Montgomery
+reductions are used to reduce the squared or multiplied temporary results modulo $c$.
+
+\subsection{Fast Modular Reductions}
+
+\subsubsection{mp\_reduce(mp\_int *a, mp\_int *b, mp\_int *c)}
+Computes a Barrett reduction in-place of $a$ modulo $b$ with respect to $c$. In essence it computes
+$a \equiv a \mbox{ (mod }b\mbox{)}$ provided $0 \le a \le b^2$. The value of $c$ is precomputed with the
+function mp\_reduce\_setup().
+
+The Barrett reduction function has been optimized to use partial multipliers which means compared to MPI it performs
+have the number of single precision multipliers (\textit{provided they have the same size digits}). The partial
+multipliers (\textit{one of which is shared with mp\_mul}) both have baseline and comba variants. Barrett reduction
+can reduce a number modulo a $n-$digit modulus with approximately $2n^2$ single precision multiplications.
+
+\subsubsection{mp\_montgomery\_reduce(mp\_int *a, mp\_int *m, mp\_digit mp)}
+Computes a Montgomery reduction in-place of $a$ modulo $b$ with respect to $mp$. If $b$ is some $n-$digit modulus then
+$R = \beta^{n+1}$. The result of this function is $aR^{-1} \mbox{ (mod }b\mbox{)}$ provided that $0 \le a \le b^2$.
+The value of $mp$ is precomputed with the function mp\_montgomery\_setup().
+
+The Montgomery reduction comes in two variants. A standard baseline and a fast comba method. The baseline routine
+is in fact slower than the Barrett reductions, however, the comba routine is much faster. Montomgery reduction can
+reduce a number modulo a $n-$digit modulus with approximately $n^2 + n$ single precision multiplications.
-Let $\gamma = \lfloor \alpha/k \rfloor + k$ represent the total multiplications. The total work of a exponentiation is
-therefore, $O(3 \cdot N^2 + (\alpha + \gamma) \cdot ((N^2)/2) + \alpha \cdot ((N^2 + N)/2) + \gamma \cdot N^2)$ which
-simplies to $O(3 \cdot N^2 + \gamma N^2 + \alpha (N^2 + (N/2)))$. The bulk of the time is spent in the Barrett
-reductions and the squaring algorithms. Since $\gamma < \alpha$ it makes sense to optimize first the Barrett and
-squaring routines first. Significant improvements in the future will most likely stem from optimizing these instead
-of optimizing the multipliers.
+Note that the final result of a Montgomery reduction is not just the value reduced modulo $b$. You have to multiply
+by $R$ modulo $b$ to get the real result. At first that may not seem like such a worthwhile routine, however, the
+exptmod function can be made to take advantage of this such that only one normalization at the end is required.
\section{Timing Analysis}
\subsection{Observed Timings}
@@ -503,9 +536,9 @@ \subsection{Observed Timings}
Square & 2048 & 72,126 & 17,621 \\
Square & 4096 & 306,269 & 67,576 \\
\hline
-Exptmod & 512 & 32,021,586 & 4,138,354 \\
-Exptmod & 768 & 97,595,492 & 9,840,233 \\
-Exptmod & 1024 & 223,302,532 & 20,624,553 \\
+Exptmod & 512 & 32,021,586 & 3,118,435 \\
+Exptmod & 768 & 97,595,492 & 8,493,633 \\
+Exptmod & 1024 & 223,302,532 & 17,715,899 \\
Exptmod & 2048 & 1,682,223,369 & 114,936,361 \\
Exptmod & 2560 & 3,268,615,571 & 229,402,426 \\
Exptmod & 3072 & 5,597,240,141 & 367,403,840 \\
View
@@ -1,3 +1,8 @@
+Jan 9th, 2003
+v0.10 -- Pekka Riikonen suggested fixes to the radix conversion code.
+ -- Added baseline montgomery and comba montgomery reductions, sped up exptmods
+ [to a point, see bn.h for MONTGOMERY_EXPT_CUTOFF]
+
Jan 6th, 2003
v0.09 -- Updated the manual to reflect recent changes. :-)
-- Added Jacobi function (mp_jacobi) to supplement the number theory side of the lib
View
47 demo.c
@@ -21,12 +21,15 @@
#define TIMER
extern ulong64 rdtsc(void);
extern void reset(void);
-#else
+#endif
+#ifdef TIMER
+#ifndef TIMER_X86
ulong64 _tt;
void reset(void) { _tt = clock(); }
ulong64 rdtsc(void) { return clock() - _tt; }
#endif
+#endif
#ifndef DEBUG
int _ifuncs;
@@ -82,6 +85,7 @@ int main(void)
mp_int a, b, c, d, e, f;
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n;
int rr;
+ mp_digit tom;
#ifdef TIMER
int n;
@@ -94,29 +98,38 @@ int main(void)
mp_init(&d);
mp_init(&e);
mp_init(&f);
-
+
+ mp_read_radix(&a, "59994534535345535344389423", 10);
+ mp_read_radix(&b, "49993453555234234565675534", 10);
+ mp_read_radix(&c, "62398923474472948723847281", 10);
+
+ mp_mulmod(&a, &b, &c, &f);
+
+ /* setup mont */
+ mp_montgomery_setup(&c, &tom);
+ mp_mul(&a, &b, &a);
+ mp_montgomery_reduce(&a, &c, tom);
+ mp_montgomery_reduce(&a, &c, tom);
+ mp_lshd(&a, c.used*2);
+ mp_mod(&a, &c, &a);
+
+ mp_toradix(&a, cmd, 10);
+ printf("%s\n\n", cmd);
+ mp_toradix(&f, cmd, 10);
+ printf("%s\n", cmd);
+
+/* return 0; */
+
+
mp_read_radix(&a, "V//////////////////////////////////////////////////////////////////////////////////////", 64);
mp_reduce_setup(&b, &a);
printf("\n\n----\n\n");
mp_toradix(&b, buf, 10);
printf("b == %s\n\n\n", buf);
-
+
mp_read_radix(&b, "4982748972349724892742", 10);
mp_sub_d(&a, 1, &c);
-
-#ifdef DEBUG
- mp_sqr(&a, &a);mp_sqr(&c, &c);
- mp_sqr(&a, &a);mp_sqr(&c, &c);
- mp_sqr(&a, &a);mp_sqr(&c, &c);
- reset_timings();
-#endif
mp_exptmod(&b, &c, &a, &d);
-#ifdef DEBUG
- dump_timings();
- return 0;
-
-#endif
-
mp_toradix(&d, buf, 10);
printf("b^p-1 == %s\n", buf);
@@ -169,7 +182,7 @@ int main(void)
printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)100000));
mp_copy(&b, &a);
}
-
+
{
char *primes[] = {
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
View
@@ -85,10 +85,11 @@ static mp_digit prime_digit()
}
/* makes a prime of at least k bits */
-int pprime(int k, mp_int *p, mp_int *q)
+int pprime(int k, int li, mp_int *p, mp_int *q)
{
mp_int a, b, c, n, x, y, z, v;
- int res;
+ int res, ii;
+ static const mp_digit bases[] = { 2, 3, 5, 7, 11, 13, 17, 19 };
/* single digit ? */
if (k <= (int)DIGIT_BIT) {
@@ -167,55 +168,60 @@ int pprime(int k, mp_int *p, mp_int *q)
if (mp_cmp_d(&y, 1) != MP_EQ) goto top;
- /* now try base x=2 */
- mp_set(&x, 2);
+ /* now try base x=bases[ii] */
+ for (ii = 0; ii < li; ii++) {
+ mp_set(&x, bases[ii]);
- /* compute x^a mod n */
- if ((res = mp_exptmod(&x, &a, &n, &y)) != MP_OKAY) { /* y = x^a mod n */
- goto __Z;
- }
+ /* compute x^a mod n */
+ if ((res = mp_exptmod(&x, &a, &n, &y)) != MP_OKAY) { /* y = x^a mod n */
+ goto __Z;
+ }
- /* if y == 1 loop */
- if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
+ /* if y == 1 loop */
+ if (mp_cmp_d(&y, 1) == MP_EQ) continue;
- /* now x^2a mod n */
- if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2a mod n */
- goto __Z;
- }
+ /* now x^2a mod n */
+ if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2a mod n */
+ goto __Z;
+ }
- if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
+ if (mp_cmp_d(&y, 1) == MP_EQ) continue;
- /* compute x^b mod n */
- if ((res = mp_exptmod(&x, &b, &n, &y)) != MP_OKAY) { /* y = x^b mod n */
- goto __Z;
- }
+ /* compute x^b mod n */
+ if ((res = mp_exptmod(&x, &b, &n, &y)) != MP_OKAY) { /* y = x^b mod n */
+ goto __Z;
+ }
- /* if y == 1 loop */
- if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
+ /* if y == 1 loop */
+ if (mp_cmp_d(&y, 1) == MP_EQ) continue;
- /* now x^2b mod n */
- if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2b mod n */
- goto __Z;
- }
+ /* now x^2b mod n */
+ if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2b mod n */
+ goto __Z;
+ }
- if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
+ if (mp_cmp_d(&y, 1) == MP_EQ) continue;
+ /* compute x^c mod n == x^ab mod n */
+ if ((res = mp_exptmod(&x, &c, &n, &y)) != MP_OKAY) { /* y = x^ab mod n */
+ goto __Z;
+ }
- /* compute x^c mod n == x^ab mod n */
- if ((res = mp_exptmod(&x, &c, &n, &y)) != MP_OKAY) { /* y = x^ab mod n */
- goto __Z;
- }
-
- /* if y == 1 loop */
- if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
+ /* if y == 1 loop */
+ if (mp_cmp_d(&y, 1) == MP_EQ) continue;
- /* now compute (x^c mod n)^2 */
- if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2ab mod n */
- goto __Z;
- }
+ /* now compute (x^c mod n)^2 */
+ if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2ab mod n */
+ goto __Z;
+ }
- /* y should be 1 */
- if (mp_cmp_d(&y, 1) != MP_EQ) goto top;
+ /* y should be 1 */
+ if (mp_cmp_d(&y, 1) != MP_EQ) continue;
+ break;
+ }
+
+ /* no bases worked? */
+ if (ii == li) goto top;
/*
{
@@ -232,10 +238,14 @@ int pprime(int k, mp_int *p, mp_int *q)
*/
/* a = n */
mp_copy(&n, &a);
- }
+ }
+
+ /* get q to be the order of the large prime subgroup */
+ mp_sub_d(&n, 1, q);
+ mp_div_2(q, q);
+ mp_div(q, &b, q, NULL);
mp_exch(&n, p);
- mp_exch(&b, q);
res = MP_OKAY;
__Z: mp_clear(&z);
@@ -254,19 +264,25 @@ int main(void)
{
mp_int p, q;
char buf[4096];
- int k;
+ int k, li;
clock_t t1;
srand(time(NULL));
printf("Enter # of bits: \n");
- scanf("%d", &k);
+ fgets(buf, sizeof(buf), stdin);
+ sscanf(buf, "%d", &k);
+
+ printf("Enter number of bases to try (1 to 8):\n");
+ fgets(buf, sizeof(buf), stdin);
+ sscanf(buf, "%d", &li);
+
mp_init(&p);
mp_init(&q);
t1 = clock();
- pprime(k, &p, &q);
+ pprime(k, li, &p, &q);
t1 = clock() - t1;
printf("\n\nTook %ld ticks, %d bits\n", t1, mp_count_bits(&p));
View
@@ -1,7 +1,7 @@
CC = gcc
CFLAGS += -Wall -W -Wshadow -ansi -O3 -fomit-frame-pointer -funroll-loops
-VERSION=0.09
+VERSION=0.10
default: test
Oops, something went wrong.

0 comments on commit fb93a30

Please sign in to comment.