Permalink
Browse files

added libtommath-0.02

  • Loading branch information...
1 parent 390fa39 commit 8c97c9e1814ad3b7c5e99a7c951521d6ea079834 Tom St Denis committed with sjaeckel Feb 28, 2003
Showing with 4,689 additions and 47 deletions.
  1. +2 −2 b.bat
  2. +71 −14 bn.c
  3. +17 −0 bn.h
  4. BIN bn.pdf
  5. +34 −24 bn.tex
  6. +10 −5 changes.txt
  7. +6 −1 demo.c
  8. +1 −1 makefile
  9. +20 −0 mtest/logtab.h
  10. +86 −0 mtest/mpi-config.h
  11. +16 −0 mtest/mpi-types.h
  12. +3,981 −0 mtest/mpi.c
  13. +227 −0 mtest/mpi.h
  14. +218 −0 mtest/mtest.c
View
@@ -1,3 +1,3 @@
nasm -f coff timer.asm
-gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER demo.c bn.c timer.o -o demo
-gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER demo.c mtest/mpi.c timer.o -o mpidemo
+gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o demo
+gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo
View
@@ -168,8 +168,31 @@ int mp_init_copy(mp_int *a, mp_int *b)
return mp_copy(b, a);
}
+/* b = |a| */
+int mp_abs(mp_int *a, mp_int *b)
+{
+ int res;
+ if ((res = mp_copy(a, b)) != MP_OKAY) {
+ return res;
+ }
+ b->sign = MP_ZPOS;
+ return MP_OKAY;
+}
+
+/* b = -a */
+int mp_neg(mp_int *a, mp_int *b)
+{
+ int res;
+ if ((res = mp_copy(a, b)) != MP_OKAY) {
+ return res;
+ }
+ b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
+ return MP_OKAY;
+}
+
+
/* compare maginitude of two ints (unsigned) */
-static int s_mp_cmp(mp_int *a, mp_int *b)
+int mp_cmp_mag(mp_int *a, mp_int *b)
{
int n;
@@ -200,7 +223,7 @@ int mp_cmp(mp_int *a, mp_int *b)
} else if (a->sign == MP_ZPOS && b->sign == MP_NEG) {
return MP_GT;
}
- return s_mp_cmp(a, b);
+ return mp_cmp_mag(a, b);
}
/* compare a digit */
@@ -500,7 +523,7 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
{
mp_int t;
int res, pa, pb, ix, iy;
- mp_word W[512];
+ mp_word W[512], *_W;
mp_digit tmpx, *tmpt, *tmpy;
// printf("\nHOLA\n");
@@ -519,8 +542,9 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
tmpx = a->dp[ix];
tmpt = &(t.dp[ix]);
tmpy = b->dp;
+ _W = &(W[ix]);
for (iy = 0; iy < pb; iy++) {
- W[iy+ix] += ((mp_word)tmpx) * ((mp_word)*tmpy++);
+ *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
}
}
@@ -590,7 +614,7 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
{
mp_int t;
int res, pa, pb, ix, iy;
- mp_word W[512];
+ mp_word W[512], *_W;
mp_digit tmpx, *tmpt, *tmpy;
if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
@@ -605,8 +629,9 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
tmpx = a->dp[ix];
tmpt = &(t.dp[digs]);
tmpy = b->dp + (digs - ix);
+ _W = &(W[digs]);
for (iy = digs - ix; iy < pb; iy++) {
- W[ix+iy] += ((mp_word)tmpx) * ((mp_word)*tmpy++);
+ *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
}
}
@@ -678,7 +703,7 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b)
{
mp_int t;
int res, ix, iy, pa;
- mp_word r, W[512];
+ mp_word W[512], *_W;
mp_digit tmpx, *tmpy;
pa = a->used;
@@ -694,9 +719,9 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b)
W[ix+ix] += ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]);
tmpx = a->dp[ix];
tmpy = &(a->dp[ix+1]);
+ _W = &(W[ix+ix+1]);
for (iy = ix + 1; iy < pa; iy++) {
- r = ((mp_word)tmpx) * ((mp_word)*tmpy++);
- W[ix+iy] += r + r;
+ *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++) << ((mp_word)1);
}
}
for (ix = 1; ix < (pa+pa+1); ix++) {
@@ -781,7 +806,7 @@ int mp_add(mp_int *a, mp_int *b, mp_int *c)
c->sign = MP_ZPOS;
} else if (sa == MP_ZPOS && sb == MP_NEG) {
/* a + -b == a - b, but if b>a then we do it as -(b-a) */
- if (s_mp_cmp(a, b) == MP_LT) {
+ if (mp_cmp_mag(a, b) == MP_LT) {
res = s_mp_sub(b, a, c);
c->sign = MP_NEG;
} else {
@@ -790,7 +815,7 @@ int mp_add(mp_int *a, mp_int *b, mp_int *c)
}
} else if (sa == MP_NEG && sb == MP_ZPOS) {
/* -a + b == b - a, but if a>b then we do it as -(a-b) */
- if (s_mp_cmp(a, b) == MP_GT) {
+ if (mp_cmp_mag(a, b) == MP_GT) {
res = s_mp_sub(a, b, c);
c->sign = MP_NEG;
} else {
@@ -816,7 +841,7 @@ int mp_sub(mp_int *a, mp_int *b, mp_int *c)
/* handle four cases */
if (sa == MP_ZPOS && sb == MP_ZPOS) {
/* both positive, a - b, but if b>a then we do -(b - a) */
- if (s_mp_cmp(a, b) == MP_LT) {
+ if (mp_cmp_mag(a, b) == MP_LT) {
/* b>a */
res = s_mp_sub(b, a, c);
c->sign = MP_NEG;
@@ -834,7 +859,7 @@ int mp_sub(mp_int *a, mp_int *b, mp_int *c)
c->sign = MP_NEG;
} else {
/* -a - -b == b - a, but if a>b == -(a - b) */
- if (s_mp_cmp(a, b) == MP_GT) {
+ if (mp_cmp_mag(a, b) == MP_GT) {
res = s_mp_sub(a, b, c);
c->sign = MP_NEG;
} else {
@@ -1023,7 +1048,7 @@ int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
int res, n, t, i, norm, neg;
/* if a < b then q=0, r = a */
- if (s_mp_cmp(a, b) == MP_LT) {
+ if (mp_cmp_mag(a, b) == MP_LT) {
if (d != NULL) {
res = mp_copy(a, d);
d->sign = a->sign;
@@ -1981,3 +2006,35 @@ int mp_toradix(mp_int *a, unsigned char *str, int radix)
return MP_OKAY;
}
+/* returns size of ASCII reprensentation */
+int mp_radix_size(mp_int *a, int radix)
+{
+ int res, digs;
+ mp_int t;
+ mp_digit d;
+
+ digs = 0;
+
+ if (radix < 2 || radix > 64) {
+ return 0;
+ }
+
+ if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
+ return 0;
+ }
+
+ if (t.sign == MP_NEG) {
+ ++digs;
+ t.sign = MP_ZPOS;
+ }
+
+ while (mp_iszero(&t) == 0) {
+ if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
+ return 0;
+ }
+ ++digs;
+ }
+ mp_clear(&t);
+ return digs + 1;
+}
+
View
@@ -128,9 +128,18 @@ int mp_mod_2d(mp_int *a, int b, mp_int *c);
/* ---> Basic arithmetic <--- */
+/* b = -a */
+int mp_neg(mp_int *a, mp_int *b);
+
+/* b = |a| */
+int mp_abs(mp_int *a, mp_int *b);
+
/* compare a to b */
int mp_cmp(mp_int *a, mp_int *b);
+/* compare |a| to |b| */
+int mp_cmp_mag(mp_int *a, mp_int *b);
+
/* c = a + b */
int mp_add(mp_int *a, mp_int *b, mp_int *c);
@@ -215,6 +224,14 @@ int mp_to_signed_bin(mp_int *a, unsigned char *b);
int mp_read_radix(mp_int *a, unsigned char *str, int radix);
int mp_toradix(mp_int *a, unsigned char *str, int radix);
+int mp_radix_size(mp_int *a, int radix);
+
+#define mp_read_raw(mp, str, len) mp_read_signed_bin((mp), (str), (len))
+#define mp_raw_size(mp) mp_signed_bin_size(mp)
+#define mp_toraw(mp, str) mp_to_signed_bin((mp), (str))
+#define mp_read_mag(mp, str, len) mp_read_unsigned_bin((mp), (str), (len))
+#define mp_mag_size(mp) mp_unsigned_bin_size(mp)
+#define mp_tomag(mp, str) mp_to_unsigned_bin((mp), (str))
#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
View
Binary file not shown.
View
@@ -1,7 +1,7 @@
\documentclass{article}
\begin{document}
-\title{LibTomMath v0.01 \\ A Free Multiple Precision Integer Library}
+\title{LibTomMath v0.02 \\ A Free Multiple Precision Integer Library}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
\newpage
@@ -192,9 +192,18 @@ \subsection{Basic Arithmetic}
Next are the class of functions which provide basic arithmetic.
\begin{verbatim}
+/* b = -a */
+int mp_neg(mp_int *a, mp_int *b);
+
+/* b = |a| */
+int mp_abs(mp_int *a, mp_int *b);
+
/* compare a to b */
int mp_cmp(mp_int *a, mp_int *b);
+/* compare |a| to |b| */
+int mp_cmp_mag(mp_int *a, mp_int *b);
+
/* c = a + b */
int mp_add(mp_int *a, mp_int *b, mp_int *c);
@@ -298,6 +307,7 @@ \subsection{Radix Conversions}
int mp_read_radix(mp_int *a, unsigned char *str, int radix);
int mp_toradix(mp_int *a, unsigned char *str, int radix);
+int mp_radix_size(mp_int *a, int radix);
\end{verbatim}
The integers are stored in big endian format as most libraries (and MPI) expect. The \textbf{mp\_read\_radix} and
@@ -318,27 +328,27 @@ \subsection{Observed Timings}
\begin{tabular}{c|c|c|c}
\hline \textbf{Operation} & \textbf{Size (bits)} & \textbf{Time with MPI (cycles)} & \textbf{Time with LibTomMath (cycles)} \\
\hline
-Multiply & 128 & 1,394 & 915 \\
-Multiply & 256 & 2,559 & 1,893 \\
-Multiply & 512 & 7,919 & 3,770 \\
-Multiply & 1024 & 28,460 & 9,970 \\
-Multiply & 2048 & 109,637 & 32,264 \\
-Multiply & 4096 & 467,226 & 129,645 \\
+Multiply & 128 & 1,394 & 893 \\
+Multiply & 256 & 2,559 & 1,744 \\
+Multiply & 512 & 7,919 & 4,484 \\
+Multiply & 1024 & 28,460 & 9,326, \\
+Multiply & 2048 & 109,637 & 30,140 \\
+Multiply & 4096 & 467,226 & 122,290 \\
\hline
-Square & 128 & 1,288 & 1,147 \\
-Square & 256 & 1,705 & 2,129 \\
-Square & 512 & 5,365 & 3,755 \\
-Square & 1024 & 18,836 & 9,267 \\
-Square & 2048 & 72,334 & 28,387 \\
-Square & 4096 & 306,252 & 112,391 \\
+Square & 128 & 1,288 & 1,172 \\
+Square & 256 & 1,705 & 2,162 \\
+Square & 512 & 5,365 & 3,723 \\
+Square & 1024 & 18,836 & 9,063 \\
+Square & 2048 & 72,334 & 27,489 \\
+Square & 4096 & 306,252 & 110,372 \\
\hline
-Exptmod & 512 & 30,497,732 & 7,222,872 \\
-Exptmod & 768 & 98,943,020 & 16,474,567 \\
-Exptmod & 1024 & 221,123,749 & 30,070,883 \\
-Exptmod & 2048 & 1,694,796,907 & 154,697,320 \\
-Exptmod & 2560 & 3,262,360,107 & 318,998,183 \\
-Exptmod & 3072 & 5,647,243,373 & 494,313,122 \\
-Exptmod & 4096 & 13,345,194,048 & 1,036,254,558
+Exptmod & 512 & 30,497,732 & 6,898,504 \\
+Exptmod & 768 & 98,943,020 & 15,510,779 \\
+Exptmod & 1024 & 221,123,749 & 27,962,904 \\
+Exptmod & 2048 & 1,694,796,907 & 146,631,975 \\
+Exptmod & 2560 & 3,262,360,107 & 305,530,060 \\
+Exptmod & 3072 & 5,647,243,373 & 472,572,762 \\
+Exptmod & 4096 & 13,345,194,048 & 984,415,240
\end{tabular}
\end{center}
@@ -367,7 +377,7 @@ \subsection{Multiplication Algorithms}
will benefit from the algorithm.
MPI only implements the slower baseline multiplier where carries are dealt with in the inner loop. As a result even at
-smaller numbers (below the Karatsuba cutoff) the LibTomCrypt multipliers are faster.
+smaller numbers (below the Karatsuba cutoff) the LibTomMath multipliers are faster.
\subsection{Squaring Algorithms}
@@ -393,16 +403,16 @@ \subsection{Exponentiation Algorithms}
of $g(3)$, four squarings and and a multiplication by $g(1)$. In total there are 8 squarings and 3 multiplications.
MPI uses a binary square-multiply method. For the same exponent $e$ it would have had 8 squarings and 5 multiplications.
-There is a precomputation phase for the method LibTomCrypt uses but it generally cuts down considerably on the number
+There is a precomputation phase for the method LibTomMath uses but it generally cuts down considerably on the number
of multiplications. Consider a 512-bit exponent. The worst case for the LibTomMath method results in 512 squarings and
124 multiplications. The MPI method would have 512 squarings and 512 multiplications. Randomly every $2k$ bits another
multiplication is saved via the sliding-window technique on top of the savings the $k$-ary method provides.
Both LibTomMath and MPI use Barrett reduction instead of division to reduce the numbers modulo the modulus given.
However, LibTomMath can take advantage of the fact that the multiplications required within the Barrett reduction
-do not to give full precision. As a result the reduction step is much faster and just as accurate. The LibTomMath code
+do not have to give full precision. As a result the reduction step is much faster and just as accurate. The LibTomMath code
will automatically determine at run-time (e.g. when its called) whether the faster multiplier can be used. The
-faster multipliers have also been optimized into the two variants (baseline and fast baseline).
+faster multipliers have also been optimized into the two variants (baseline and comba baseline).
As a result of all these changes exponentiation in LibTomMath is much faster than compared to MPI.
View
@@ -1,6 +1,11 @@
+Dec 26th, 2002
+v0.02 -- Fixed a few "slips" in the manual. This is "LibTomMath" afterall :-)
+ -- Added mp_cmp_mag, mp_neg, mp_abs and mp_radix_size that were missing.
+ -- Sped up the fast [comba] multipliers more [yahoo!]
+
Dec 25th,2002
-v0.01 -- Initial release. Gimme a break.
- -- Todo list,
- add details to manual [e.g. algorithms]
- more comments in code
- example programs
+v0.01 -- Initial release. Gimme a break.
+ -- Todo list,
+ add details to manual [e.g. algorithms]
+ more comments in code
+ example programs
View
@@ -11,9 +11,14 @@
#include "bn.h"
#endif
-#ifdef TIMER
+#ifdef TIMER_X86
+#define TIMER
extern unsigned long long rdtsc(void);
extern void reset(void);
+#else
+unsigned long long _tt;
+void reset(void) { _tt = clock(); }
+unsigned long long rdtsc(void) { return clock() - _tt; }
#endif
static void draw(mp_int *a)
View
@@ -1,7 +1,7 @@
CC = gcc
CFLAGS += -Wall -W -O3 -funroll-loops
-VERSION=0.01
+VERSION=0.02
default: test
Oops, something went wrong.

0 comments on commit 8c97c9e

Please sign in to comment.