diff --git a/bncore.c b/bncore.c index c97b8e199..983256a4b 100644 --- a/bncore.c +++ b/bncore.c @@ -26,6 +26,34 @@ int KARATSUBA_MUL_CUTOFF = 80, /* Min. number of digits before Karatsub TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ TOOM_SQR_CUTOFF = 400; + +#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS)) +int mp_get_KARATSUBA_MUL_CUTOFF(void){ + return KARATSUBA_MUL_CUTOFF; +} +void mp_set_KARATSUBA_MUL_CUTOFF(int cutoff){ + KARATSUBA_MUL_CUTOFF = cutoff; +} +int mp_get_KARATSUBA_SQR_CUTOFF(void){ + return KARATSUBA_SQR_CUTOFF; +} +void mp_set_KARATSUBA_SQR_CUTOFF(int cutoff){ + KARATSUBA_SQR_CUTOFF = cutoff; +} +int mp_get_TOOM_MUL_CUTOFF(void){ + return TOOM_MUL_CUTOFF; +} +void mp_set_TOOM_MUL_CUTOFF(int cutoff){ + TOOM_MUL_CUTOFF = cutoff; +} +int mp_get_TOOM_SQR_CUTOFF(void){ + return TOOM_SQR_CUTOFF; +} +void mp_set_TOOM_SQR_CUTOFF(int cutoff){ + TOOM_SQR_CUTOFF = cutoff; +} +#endif + #endif /* ref: $Format:%D$ */ diff --git a/demo/timing.c b/demo/timing.c index 7a5da20a3..340ab6937 100644 --- a/demo/timing.c +++ b/demo/timing.c @@ -205,19 +205,29 @@ int main(void) FCLOSE(log); /* do mult/square twice, first without karatsuba and second with */ +#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS)) + old_kara_m = mp_get_KARATSUBA_MUL_CUTOFF(); + old_kara_s = mp_get_KARATSUBA_SQR_CUTOFF(); +#else old_kara_m = KARATSUBA_MUL_CUTOFF; old_kara_s = KARATSUBA_SQR_CUTOFF; +#endif /* currently toom-cook cut-off is too high to kick in, so we just use the karatsuba values */ old_toom_m = old_kara_m; old_toom_s = old_kara_m; for (ix = 0; ix < 3; ix++) { printf("With%s Karatsuba, With%s Toom\n", (ix == 0) ? "out" : "", (ix == 1) ? "out" : ""); - +#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS)) + mp_set_KARATSUBA_MUL_CUTOFF((ix == 1) ? old_kara_m : 9999); + mp_set_KARATSUBA_SQR_CUTOFF((ix == 1) ? old_kara_s : 9999); + mp_set_TOOM_MUL_CUTOFF((ix == 2) ? old_toom_m : 9999); + mp_set_TOOM_SQR_CUTOFF((ix == 2) ? old_toom_s : 9999); +#else KARATSUBA_MUL_CUTOFF = (ix == 1) ? old_kara_m : 9999; KARATSUBA_SQR_CUTOFF = (ix == 1) ? old_kara_s : 9999; TOOM_MUL_CUTOFF = (ix == 2) ? old_toom_m : 9999; TOOM_SQR_CUTOFF = (ix == 2) ? old_toom_s : 9999; - +#endif log = FOPEN((ix == 0) ? "logs/mult.log" : (ix == 1) ? "logs/mult_kara.log" : "logs/mult_toom.log", "w"); for (cnt = 4; cnt <= (10240 / DIGIT_BIT); cnt += 2) { SLEEP; diff --git a/doc/bn.tex b/doc/bn.tex index ac84ed34a..78635cab7 100644 --- a/doc/bn.tex +++ b/doc/bn.tex @@ -127,6 +127,19 @@ \subsection{Shared Libraries} There is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile. It requires Cygwin to work with since it requires the auto-export/import functionality. The resulting DLL and import library ``libtommath.dll.a'' can be used to link LibTomMath dynamically to any Windows program using Cygwin. +\subsubsection{Shared Library for X32} +It is possible to build a shared library for the x32 architecture with some restrictions. It needs GCC 4.8.0 or later and restricts the availibility of some symbols. Those symbols are: +\begin{itemize} +\item[] \texttt{KARATSUBA\_MUL\_CUTOFF} +\item[] \texttt{KARATSUBA\_SQR\_CUTOFF} +\item[] \texttt{TOOM\_MUL\_CUTOFF} +\item[] \texttt{TOOM\_SQR\_CUTOFF} +\item[] \texttt{ltm\_prime\_tab} +\item[] \texttt{mp\_s\_rmap} +\item[] \texttt{mp\_s\_rmap\_reverse} +\item[] \texttt{mp\_s\_rmap\_reverse\_sz} +\end{itemize} +There are getters and setters implemented for the four Toom-Cook cut-offs if the macro \texttt{LTM\_EXPORT\_CUTOFFS} is defined. See section \ref{benchmark} for some details. \subsection{Testing} To build the library and the test harness type @@ -243,6 +256,25 @@ \subsubsection{Operand Size Related} \end{center} \end{small} +\subsection{Automatic Evaluation of the Cut-Off Points}\label{benchmark} +There is a small program in \texttt{demo/timing.c} to find the ideal cut-off points for the Toom-Cook algorithms. You can build it with +\begin{alltt} +make timing +\end{alltt} +and run it with +\begin{alltt} +./timing +\end{alltt} + +The results will show up in the directory \texttt{log}. There is a small bump for the x32 architecture if you want to benchmark a shared library because of the peculiarites of the x32 architectures. The small makefile \texttt{makefile.shared.timing} is included for that purpose. It will build a slightly modified version of the shared library that includes getters and setters for the otherwise hidden variables for the Toom-Cook algorithms which is meant for the evaulation of the cut-offs only, Build it with +\begin{alltt} +make -f makefile.shared.timing timing +\end{alltt} +and run it with +\begin{alltt} +./timing +\end{alltt} + \section{Purpose of LibTomMath} Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath was not written with diff --git a/makefile.shared.timing b/makefile.shared.timing new file mode 100644 index 000000000..433bf9ec1 --- /dev/null +++ b/makefile.shared.timing @@ -0,0 +1,71 @@ +#Makefile for GCC +# +#Tom St Denis + +#default files to install +ifndef LIBNAME + LIBNAME=libtommath.la +endif + +include makefile_include.mk + + +ifndef LIBTOOL + ifeq ($(PLATFORM), Darwin) + LIBTOOL:=glibtool + else + LIBTOOL:=libtool + endif +endif +LTCOMPILE = $(LIBTOOL) --mode=compile --tag=CC $(CC) +LTLINK = $(LIBTOOL) --mode=link --tag=CC $(CC) + +LCOV_ARGS=--directory .libs --directory . + +#START_INS +OBJECTS=bn_error.o bn_fast_mp_invmod.o bn_fast_mp_montgomery_reduce.o bn_fast_s_mp_mul_digs.o \ +bn_fast_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_mp_2expt.o bn_mp_abs.o bn_mp_add.o bn_mp_add_d.o \ +bn_mp_addmod.o bn_mp_and.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o bn_mp_cmp_d.o \ +bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o bn_mp_div.o \ +bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o bn_mp_dr_reduce.o \ +bn_mp_dr_setup.o bn_mp_exch.o bn_mp_export.o bn_mp_expt_d.o bn_mp_expt_d_ex.o bn_mp_exptmod.o \ +bn_mp_exptmod_fast.o bn_mp_exteuclid.o bn_mp_fread.o bn_mp_fwrite.o bn_mp_gcd.o bn_mp_get_bit.o \ +bn_mp_get_double.o bn_mp_get_int.o bn_mp_get_long.o bn_mp_get_long_long.o bn_mp_grow.o bn_mp_import.o \ +bn_mp_init.o bn_mp_init_copy.o bn_mp_init_multi.o bn_mp_init_set.o bn_mp_init_set_int.o bn_mp_init_size.o \ +bn_mp_invmod.o bn_mp_invmod_slow.o bn_mp_is_square.o bn_mp_jacobi.o bn_mp_karatsuba_mul.o \ +bn_mp_karatsuba_sqr.o bn_mp_kronecker.o bn_mp_lcm.o bn_mp_lshd.o bn_mp_mod.o bn_mp_mod_2d.o bn_mp_mod_d.o \ +bn_mp_montgomery_calc_normalization.o bn_mp_montgomery_reduce.o bn_mp_montgomery_setup.o bn_mp_mul.o \ +bn_mp_mul_2.o bn_mp_mul_2d.o bn_mp_mul_d.o bn_mp_mulmod.o bn_mp_n_root.o bn_mp_n_root_ex.o bn_mp_neg.o \ +bn_mp_or.o bn_mp_prime_fermat.o bn_mp_prime_frobenius_underwood.o bn_mp_prime_is_divisible.o \ +bn_mp_prime_is_prime.o bn_mp_prime_miller_rabin.o bn_mp_prime_next_prime.o \ +bn_mp_prime_rabin_miller_trials.o bn_mp_prime_random_ex.o bn_mp_prime_strong_lucas_selfridge.o \ +bn_mp_radix_size.o bn_mp_radix_smap.o bn_mp_rand.o bn_mp_read_radix.o bn_mp_read_signed_bin.o \ +bn_mp_read_unsigned_bin.o bn_mp_reduce.o bn_mp_reduce_2k.o bn_mp_reduce_2k_l.o bn_mp_reduce_2k_setup.o \ +bn_mp_reduce_2k_setup_l.o bn_mp_reduce_is_2k.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_setup.o bn_mp_rshd.o \ +bn_mp_set.o bn_mp_set_double.o bn_mp_set_int.o bn_mp_set_long.o bn_mp_set_long_long.o bn_mp_shrink.o \ +bn_mp_signed_bin_size.o bn_mp_sqr.o bn_mp_sqrmod.o bn_mp_sqrt.o bn_mp_sqrtmod_prime.o bn_mp_sub.o \ +bn_mp_sub_d.o bn_mp_submod.o bn_mp_tc_and.o bn_mp_tc_div_2d.o bn_mp_tc_or.o bn_mp_tc_xor.o \ +bn_mp_to_signed_bin.o bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin.o bn_mp_to_unsigned_bin_n.o \ +bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_toradix.o bn_mp_toradix_n.o bn_mp_unsigned_bin_size.o bn_mp_xor.o \ +bn_mp_zero.o bn_prime_tab.o bn_reverse.o bn_s_mp_add.o bn_s_mp_exptmod.o bn_s_mp_mul_digs.o \ +bn_s_mp_mul_high_digs.o bn_s_mp_sqr.o bn_s_mp_sub.o bncore.o + +#END_INS + +objs: $(OBJECTS) + +.c.o: + $(LTCOMPILE) $(CFLAGS) -DLTM_EXPORT_CUTOFFS $(LDFLAGS) -o $@ -c $< + +LOBJECTS = $(OBJECTS:.o=.lo) + +$(LIBNAME): $(OBJECTS) + $(LTLINK) $(LDFLAGS) $(LOBJECTS) -DLTM_EXPORT_CUTOFFS -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION_SO) $(LIBTOOLFLAGS) + +timing: $(LIBNAME) demo/timing.c + $(LTLINK) $(CFLAGS) $(LDFLAGS) -DLTM_EXPORT_CUTOFFS -DTIMER demo/timing.c $(LIBNAME) -o timing + @echo "" + @echo " This makefile builds a special dynamic library for timing" + @echo " purpose only!" + @echo " Please do a make clean && make -f makefile.shared to make" + @echo " the production version of the dynamic LibTomMath." diff --git a/tommath.h b/tommath.h index ee5da86e7..bc9191db2 100644 --- a/tommath.h +++ b/tommath.h @@ -19,6 +19,35 @@ #include "tommath_class.h" + +#ifdef __GNUC__ +#define LTM_GNU_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) +#endif + +/* + __attribute__((visibility ("hidden"))) is in GCC since 3.3.x but the exact patchlevel + is unknown to the author. The recommended minimum GCC version is 4.8.0 according to + https://sites.google.com/site/x32abi/ . + */ +/* TODO: __attribute__((visibility ("hidden"))) is also supported by the Intel compiler */ +#if (LTM_GNU_VERSION >= 40800) +/* Workaround for x32 relocation problems */ +# if ((defined __x86_64__ ) && (defined __ILP32__)) +# define LTM_VISIBILITY_HIDDEN __attribute__((visibility ("hidden"))) +/* + Optimizing the cut-off for e.g.: the Toom-Cook algorithms need to change otherwise hidden variables + Define LTM_EXPORT_CUTOFFS to get the workarounds instead. + */ +# define LTM_NEED_EXPLICIT_EXPORT +# else +# define LTM_VISIBILITY_HIDDEN +# endif +#else +# define LTM_VISIBILITY_HIDDEN +#endif + #ifdef __cplusplus extern "C" { #endif @@ -127,10 +156,21 @@ typedef mp_digit mp_min_u32; typedef int mp_err; /* you'll have to tune these... */ -extern int KARATSUBA_MUL_CUTOFF, - KARATSUBA_SQR_CUTOFF, - TOOM_MUL_CUTOFF, - TOOM_SQR_CUTOFF; +#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS)) +int mp_get_KARATSUBA_MUL_CUTOFF(void); +void mp_set_KARATSUBA_MUL_CUTOFF(int cutoff); +int mp_get_KARATSUBA_SQR_CUTOFF(void); +void mp_set_KARATSUBA_SQR_CUTOFF(int cutoff); +int mp_get_TOOM_MUL_CUTOFF(void); +void mp_set_TOOM_MUL_CUTOFF(int cutoff); +int mp_get_TOOM_SQR_CUTOFF(void); +void mp_set_TOOM_SQR_CUTOFF(int cutoff); +#endif +extern int LTM_VISIBILITY_HIDDEN KARATSUBA_MUL_CUTOFF; +extern int LTM_VISIBILITY_HIDDEN KARATSUBA_SQR_CUTOFF; +extern int LTM_VISIBILITY_HIDDEN TOOM_MUL_CUTOFF; +extern int LTM_VISIBILITY_HIDDEN TOOM_SQR_CUTOFF; + /* define this to use lower memory usage routines (exptmods mostly) */ /* #define MP_LOW_MEM */ @@ -489,7 +529,7 @@ int mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y); #endif /* table of first PRIME_SIZE primes */ -extern const mp_digit ltm_prime_tab[PRIME_SIZE]; +extern const mp_digit LTM_VISIBILITY_HIDDEN ltm_prime_tab[PRIME_SIZE]; /* result=1 if a is divisible by one of the first PRIME_SIZE primes */ int mp_prime_is_divisible(const mp_int *a, int *result); diff --git a/tommath_private.h b/tommath_private.h index 35463702f..6e230c990 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -72,9 +72,9 @@ int mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y int s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode); void bn_reverse(unsigned char *s, int len); -extern const char *const mp_s_rmap; -extern const uint8_t mp_s_rmap_reverse[]; -extern const size_t mp_s_rmap_reverse_sz; +extern const char LTM_VISIBILITY_HIDDEN *const mp_s_rmap; +extern const uint8_t LTM_VISIBILITY_HIDDEN mp_s_rmap_reverse[]; +extern const size_t LTM_VISIBILITY_HIDDEN mp_s_rmap_reverse_sz; /* Fancy macro to set an MPI from another type. * There are several things assumed: