Permalink
Browse files

added libtommath-0.06

  • Loading branch information...
1 parent b01dd94 commit 16c6ccc62c7cfcbca7853ad34388e8d006e861e7 Tom St Denis committed with sjaeckel Feb 28, 2003
Showing with 102 additions and 71 deletions.
  1. +1 −1 b.bat
  2. +46 −34 bn.c
  3. +14 −4 bn.h
  4. BIN bn.pdf
  5. +16 −16 bn.tex
  6. +4 −0 changes.txt
  7. +20 −15 demo.c
  8. +1 −1 makefile
View
@@ -1,3 +1,3 @@
nasm -f coff timer.asm
-gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o demo
+gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o ltmdemo
gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo
View
@@ -700,8 +700,8 @@ int mp_mul_2(mp_int *a, mp_int *b)
/* low level addition */
static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
{
- mp_int t, *x;
- int res, min, max, i;
+ mp_int *x;
+ int olduse, res, min, max, i;
mp_digit u;
REGFUNC("s_mp_add");
@@ -724,54 +724,60 @@ static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
}
/* init result */
- if ((res = mp_init_size(&t, max+1)) != MP_OKAY) {
- DECFUNC();
- return res;
+ if (c->alloc < max+1) {
+ if ((res = mp_grow(c, max+1)) != MP_OKAY) {
+ DECFUNC();
+ return res;
+ }
}
- t.used = max+1;
+
+ olduse = c->used;
+ c->used = max + 1;
/* add digits from lower part */
u = 0;
for (i = 0; i < min; i++) {
/* T[i] = A[i] + B[i] + U */
- t.dp[i] = a->dp[i] + b->dp[i] + u;
+ c->dp[i] = a->dp[i] + b->dp[i] + u;
/* U = carry bit of T[i] */
- u = (t.dp[i] >> DIGIT_BIT) & 1;
+ u = (c->dp[i] >> DIGIT_BIT) & 1;
/* take away carry bit from T[i] */
- t.dp[i] &= MP_MASK;
+ c->dp[i] &= MP_MASK;
}
/* now copy higher words if any, that is in A+B if A or B has more digits add those in */
if (min != max) {
for (; i < max; i++) {
/* T[i] = X[i] + U */
- t.dp[i] = x->dp[i] + u;
+ c->dp[i] = x->dp[i] + u;
/* U = carry bit of T[i] */
- u = (t.dp[i] >> DIGIT_BIT) & 1;
+ u = (c->dp[i] >> DIGIT_BIT) & 1;
/* take away carry bit from T[i] */
- t.dp[i] &= MP_MASK;
+ c->dp[i] &= MP_MASK;
}
}
/* add carry */
- t.dp[i] = u;
-
- mp_clamp(&t);
- mp_exch(&t, c);
- mp_clear(&t);
+ c->dp[i] = u;
+
+ /* clear digits above used (since we may not have grown result above) */
+ for (i = c->used; i < olduse; i++) {
+ c->dp[i] = 0;
+ }
+
+ mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
/* low level subtraction (assumes a > b) */
static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
{
- mp_int t;
- int res, min, max, i;
+ int olduse, res, min, max, i;
mp_digit u;
REGFUNC("s_mp_sub");
@@ -784,42 +790,48 @@ static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
max = a->used;
/* init result */
- if ((res = mp_init_size(&t, max)) != MP_OKAY) {
- DECFUNC();
- return res;
+ if (c->alloc < max) {
+ if ((res = mp_grow(c, max)) != MP_OKAY) {
+ DECFUNC();
+ return res;
+ }
}
- t.used = max;
+ olduse = c->used;
+ c->used = max;
/* sub digits from lower part */
u = 0;
for (i = 0; i < min; i++) {
/* T[i] = A[i] - B[i] - U */
- t.dp[i] = a->dp[i] - (b->dp[i] + u);
+ c->dp[i] = a->dp[i] - (b->dp[i] + u);
/* U = carry bit of T[i] */
- u = (t.dp[i] >> DIGIT_BIT) & 1;
+ u = (c->dp[i] >> DIGIT_BIT) & 1;
/* Clear carry from T[i] */
- t.dp[i] &= MP_MASK;
+ c->dp[i] &= MP_MASK;
}
/* now copy higher words if any, e.g. if A has more digits than B */
if (min != max) {
for (; i < max; i++) {
/* T[i] = A[i] - U */
- t.dp[i] = a->dp[i] - u;
+ c->dp[i] = a->dp[i] - u;
/* U = carry bit of T[i] */
- u = (t.dp[i] >> DIGIT_BIT) & 1;
+ u = (c->dp[i] >> DIGIT_BIT) & 1;
/* Clear carry from T[i] */
- t.dp[i] &= MP_MASK;
+ c->dp[i] &= MP_MASK;
}
}
- mp_clamp(&t);
- mp_exch(&t, c);
- mp_clear(&t);
+ /* clear digits above used (since we may not have grown result above) */
+ for (i = c->used; i < olduse; i++) {
+ c->dp[i] = 0;
+ }
+
+ mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
@@ -2941,7 +2953,7 @@ int mp_toradix(mp_int *a, char *str, int radix)
int res, digs;
mp_int t;
mp_digit d;
- unsigned char *_s = str;
+ char *_s = str;
if (radix < 2 || radix > 64) {
return MP_VAL;
@@ -2966,7 +2978,7 @@ int mp_toradix(mp_int *a, char *str, int radix)
*str++ = s_rmap[d];
++digs;
}
- reverse(_s, digs);
+ reverse((unsigned char *)_s, digs);
*str++ = '\0';
mp_clear(&t);
return MP_OKAY;
View
@@ -12,7 +12,6 @@
*
* Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca
*/
-
#ifndef BN_H_
#define BN_H_
@@ -39,13 +38,18 @@
#else
#ifndef CRYPT
#ifdef _MSC_VER
- typedef __int64 ulong64;
+ typedef unsigned __int64 ulong64;
+ typedef signed __int64 long64;
#else
typedef unsigned long long ulong64;
+ typedef signed long long long64;
#endif
#endif
+
+ /* default case */
typedef unsigned long mp_digit;
typedef ulong64 mp_word;
+
#define DIGIT_BIT 28U
#endif
@@ -72,8 +76,14 @@
typedef int mp_err;
-#define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */
-#define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */
+/* you'll have to tune these... */
+#ifdef FAST_FPU
+ #define KARATSUBA_MUL_CUTOFF 100 /* Min. number of digits before Karatsuba multiplication is used. */
+ #define KARATSUBA_SQR_CUTOFF 100 /* Min. number of digits before Karatsuba squaring is used. */
+#else
+ #define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */
+ #define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */
+#endif
typedef struct {
int used, alloc, sign;
View
Binary file not shown.
View
@@ -1,7 +1,7 @@
\documentclass{article}
\begin{document}
-\title{LibTomMath v0.05 \\ A Free Multiple Precision Integer Library}
+\title{LibTomMath v0.06 \\ A Free Multiple Precision Integer Library}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
\newpage
@@ -460,34 +460,34 @@ \subsection{Observed Timings}
\begin{tabular}{c|c|c|c}
\hline \textbf{Operation} & \textbf{Size (bits)} & \textbf{Time with MPI (cycles)} & \textbf{Time with LibTomMath (cycles)} \\
\hline
-Inversion & 128 & 264,083 & 159,194 \\
-Inversion & 256 & 549,370 & 355,914 \\
-Inversion & 512 & 1,675,975 & 842,538 \\
-Inversion & 1024 & 5,237,957 & 2,170,804 \\
-Inversion & 2048 & 17,871,944 & 6,250,876 \\
-Inversion & 4096 & 66,610,468 & 18,161,612 \\
+Inversion & 128 & 264,083 & 59,782 \\
+Inversion & 256 & 549,370 & 146,915 \\
+Inversion & 512 & 1,675,975 & 367,172 \\
+Inversion & 1024 & 5,237,957 & 1,054,158 \\
+Inversion & 2048 & 17,871,944 & 3,459,683 \\
+Inversion & 4096 & 66,610,468 & 11,834,556 \\
\hline
Multiply & 128 & 1,426 & 828 \\
Multiply & 256 & 2,551 & 1,393 \\
Multiply & 512 & 7,913 & 2,926 \\
Multiply & 1024 & 28,496 & 8,620 \\
Multiply & 2048 & 109,897 & 28,967 \\
-Multiply & 4096 & 469,970 & 106,855 \\
+Multiply & 4096 & 469,970 & 105,387 \\
\hline
Square & 128 & 1,319 & 869 \\
Square & 256 & 1,776 & 1,362 \\
Square & 512 & 5,399 & 2,571 \\
Square & 1024 & 18,991 & 6,332 \\
Square & 2048 & 72,126 & 18,426 \\
-Square & 4096 & 306,269 & 76,305 \\
+Square & 4096 & 306,269 & 74,908 \\
\hline
-Exptmod & 512 & 32,021,586 & 5,709,468 \\
-Exptmod & 768 & 97,595,492 & 12,473,526 \\
-Exptmod & 1024 & 223,302,532 & 23,593,075 \\
-Exptmod & 2048 & 1,682,223,369 & 121,992,481 \\
-Exptmod & 2560 & 3,268,615,571 & 258,155,605 \\
-Exptmod & 3072 & 5,597,240,141 & 399,800,504 \\
-Exptmod & 4096 & 13,347,270,891 & 826,013,375
+Exptmod & 512 & 32,021,586 & 5,696,459 \\
+Exptmod & 768 & 97,595,492 & 12,428,274 \\
+Exptmod & 1024 & 223,302,532 & 22,834,316 \\
+Exptmod & 2048 & 1,682,223,369 & 119,888,049 \\
+Exptmod & 2560 & 3,268,615,571 & 250,901,263 \\
+Exptmod & 3072 & 5,597,240,141 & 391,716,431 \\
+Exptmod & 4096 & 13,347,270,891 & 814,429,647
\end{tabular}
\end{center}
View
@@ -1,3 +1,7 @@
+Dec 31st, 2002
+v0.06 -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc...
+ -- Cleaned up the header a bit more
+
Dec 30th, 2002
v0.05 -- Builds with MSVC out of the box
-- Fixed a bug in mp_invmod w.r.t. even moduli
Oops, something went wrong.

0 comments on commit 16c6ccc

Please sign in to comment.