Skip to content
28 changes: 28 additions & 0 deletions bncore.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,34 @@ int KARATSUBA_MUL_CUTOFF = 80, /* Min. number of digits before Karatsub

TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400;

#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
int mp_get_KARATSUBA_MUL_CUTOFF(void){
return KARATSUBA_MUL_CUTOFF;
}
void mp_set_KARATSUBA_MUL_CUTOFF(int cutoff){
KARATSUBA_MUL_CUTOFF = cutoff;
}
int mp_get_KARATSUBA_SQR_CUTOFF(void){
return KARATSUBA_SQR_CUTOFF;
}
void mp_set_KARATSUBA_SQR_CUTOFF(int cutoff){
KARATSUBA_SQR_CUTOFF = cutoff;
}
int mp_get_TOOM_MUL_CUTOFF(void){
return TOOM_MUL_CUTOFF;
}
void mp_set_TOOM_MUL_CUTOFF(int cutoff){
TOOM_MUL_CUTOFF = cutoff;
}
int mp_get_TOOM_SQR_CUTOFF(void){
return TOOM_SQR_CUTOFF;
}
void mp_set_TOOM_SQR_CUTOFF(int cutoff){
TOOM_SQR_CUTOFF = cutoff;
}
#endif

#endif

/* ref: $Format:%D$ */
Expand Down
14 changes: 12 additions & 2 deletions demo/timing.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,19 +205,29 @@ int main(void)
FCLOSE(log);

/* do mult/square twice, first without karatsuba and second with */
#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
old_kara_m = mp_get_KARATSUBA_MUL_CUTOFF();
old_kara_s = mp_get_KARATSUBA_SQR_CUTOFF();
#else
old_kara_m = KARATSUBA_MUL_CUTOFF;
old_kara_s = KARATSUBA_SQR_CUTOFF;
#endif
/* currently toom-cook cut-off is too high to kick in, so we just use the karatsuba values */
old_toom_m = old_kara_m;
old_toom_s = old_kara_m;
for (ix = 0; ix < 3; ix++) {
printf("With%s Karatsuba, With%s Toom\n", (ix == 0) ? "out" : "", (ix == 1) ? "out" : "");

#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
mp_set_KARATSUBA_MUL_CUTOFF((ix == 1) ? old_kara_m : 9999);
mp_set_KARATSUBA_SQR_CUTOFF((ix == 1) ? old_kara_s : 9999);
mp_set_TOOM_MUL_CUTOFF((ix == 2) ? old_toom_m : 9999);
mp_set_TOOM_SQR_CUTOFF((ix == 2) ? old_toom_s : 9999);
#else
KARATSUBA_MUL_CUTOFF = (ix == 1) ? old_kara_m : 9999;
KARATSUBA_SQR_CUTOFF = (ix == 1) ? old_kara_s : 9999;
TOOM_MUL_CUTOFF = (ix == 2) ? old_toom_m : 9999;
TOOM_SQR_CUTOFF = (ix == 2) ? old_toom_s : 9999;

#endif
log = FOPEN((ix == 0) ? "logs/mult.log" : (ix == 1) ? "logs/mult_kara.log" : "logs/mult_toom.log", "w");
for (cnt = 4; cnt <= (10240 / DIGIT_BIT); cnt += 2) {
SLEEP;
Expand Down
32 changes: 32 additions & 0 deletions doc/bn.tex
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,19 @@ \subsection{Shared Libraries}
There is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile. It requires
Cygwin to work with since it requires the auto-export/import functionality. The resulting DLL and import library
``libtommath.dll.a'' can be used to link LibTomMath dynamically to any Windows program using Cygwin.
\subsubsection{Shared Library for X32}
It is possible to build a shared library for the x32 architecture with some restrictions. It needs GCC 4.8.0 or later and restricts the availibility of some symbols. Those symbols are:
\begin{itemize}
\item[] \texttt{KARATSUBA\_MUL\_CUTOFF}
\item[] \texttt{KARATSUBA\_SQR\_CUTOFF}
\item[] \texttt{TOOM\_MUL\_CUTOFF}
\item[] \texttt{TOOM\_SQR\_CUTOFF}
\item[] \texttt{ltm\_prime\_tab}
\item[] \texttt{mp\_s\_rmap}
\item[] \texttt{mp\_s\_rmap\_reverse}
\item[] \texttt{mp\_s\_rmap\_reverse\_sz}
\end{itemize}
There are getters and setters implemented for the four Toom-Cook cut-offs if the macro \texttt{LTM\_EXPORT\_CUTOFFS} is defined. See section \ref{benchmark} for some details.

\subsection{Testing}
To build the library and the test harness type
Expand Down Expand Up @@ -243,6 +256,25 @@ \subsubsection{Operand Size Related}
\end{center}
\end{small}

\subsection{Automatic Evaluation of the Cut-Off Points}\label{benchmark}
There is a small program in \texttt{demo/timing.c} to find the ideal cut-off points for the Toom-Cook algorithms. You can build it with
\begin{alltt}
make timing
\end{alltt}
and run it with
\begin{alltt}
./timing
\end{alltt}

The results will show up in the directory \texttt{log}. There is a small bump for the x32 architecture if you want to benchmark a shared library because of the peculiarites of the x32 architectures. The small makefile \texttt{makefile.shared.timing} is included for that purpose. It will build a slightly modified version of the shared library that includes getters and setters for the otherwise hidden variables for the Toom-Cook algorithms which is meant for the evaulation of the cut-offs only, Build it with
\begin{alltt}
make -f makefile.shared.timing timing
\end{alltt}
and run it with
\begin{alltt}
./timing
\end{alltt}


\section{Purpose of LibTomMath}
Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath was not written with
Expand Down
71 changes: 71 additions & 0 deletions makefile.shared.timing
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#Makefile for GCC
#
#Tom St Denis

#default files to install
ifndef LIBNAME
LIBNAME=libtommath.la
endif

include makefile_include.mk


ifndef LIBTOOL
ifeq ($(PLATFORM), Darwin)
LIBTOOL:=glibtool
else
LIBTOOL:=libtool
endif
endif
LTCOMPILE = $(LIBTOOL) --mode=compile --tag=CC $(CC)
LTLINK = $(LIBTOOL) --mode=link --tag=CC $(CC)

LCOV_ARGS=--directory .libs --directory .

#START_INS
OBJECTS=bn_error.o bn_fast_mp_invmod.o bn_fast_mp_montgomery_reduce.o bn_fast_s_mp_mul_digs.o \
bn_fast_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_mp_2expt.o bn_mp_abs.o bn_mp_add.o bn_mp_add_d.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o bn_mp_cmp_d.o \
bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o bn_mp_div.o \
bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o bn_mp_dr_reduce.o \
bn_mp_dr_setup.o bn_mp_exch.o bn_mp_export.o bn_mp_expt_d.o bn_mp_expt_d_ex.o bn_mp_exptmod.o \
bn_mp_exptmod_fast.o bn_mp_exteuclid.o bn_mp_fread.o bn_mp_fwrite.o bn_mp_gcd.o bn_mp_get_bit.o \
bn_mp_get_double.o bn_mp_get_int.o bn_mp_get_long.o bn_mp_get_long_long.o bn_mp_grow.o bn_mp_import.o \
bn_mp_init.o bn_mp_init_copy.o bn_mp_init_multi.o bn_mp_init_set.o bn_mp_init_set_int.o bn_mp_init_size.o \
bn_mp_invmod.o bn_mp_invmod_slow.o bn_mp_is_square.o bn_mp_jacobi.o bn_mp_karatsuba_mul.o \
bn_mp_karatsuba_sqr.o bn_mp_kronecker.o bn_mp_lcm.o bn_mp_lshd.o bn_mp_mod.o bn_mp_mod_2d.o bn_mp_mod_d.o \
bn_mp_montgomery_calc_normalization.o bn_mp_montgomery_reduce.o bn_mp_montgomery_setup.o bn_mp_mul.o \
bn_mp_mul_2.o bn_mp_mul_2d.o bn_mp_mul_d.o bn_mp_mulmod.o bn_mp_n_root.o bn_mp_n_root_ex.o bn_mp_neg.o \
bn_mp_or.o bn_mp_prime_fermat.o bn_mp_prime_frobenius_underwood.o bn_mp_prime_is_divisible.o \
bn_mp_prime_is_prime.o bn_mp_prime_miller_rabin.o bn_mp_prime_next_prime.o \
bn_mp_prime_rabin_miller_trials.o bn_mp_prime_random_ex.o bn_mp_prime_strong_lucas_selfridge.o \
bn_mp_radix_size.o bn_mp_radix_smap.o bn_mp_rand.o bn_mp_read_radix.o bn_mp_read_signed_bin.o \
bn_mp_read_unsigned_bin.o bn_mp_reduce.o bn_mp_reduce_2k.o bn_mp_reduce_2k_l.o bn_mp_reduce_2k_setup.o \
bn_mp_reduce_2k_setup_l.o bn_mp_reduce_is_2k.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_setup.o bn_mp_rshd.o \
bn_mp_set.o bn_mp_set_double.o bn_mp_set_int.o bn_mp_set_long.o bn_mp_set_long_long.o bn_mp_shrink.o \
bn_mp_signed_bin_size.o bn_mp_sqr.o bn_mp_sqrmod.o bn_mp_sqrt.o bn_mp_sqrtmod_prime.o bn_mp_sub.o \
bn_mp_sub_d.o bn_mp_submod.o bn_mp_tc_and.o bn_mp_tc_div_2d.o bn_mp_tc_or.o bn_mp_tc_xor.o \
bn_mp_to_signed_bin.o bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin.o bn_mp_to_unsigned_bin_n.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_toradix.o bn_mp_toradix_n.o bn_mp_unsigned_bin_size.o bn_mp_xor.o \
bn_mp_zero.o bn_prime_tab.o bn_reverse.o bn_s_mp_add.o bn_s_mp_exptmod.o bn_s_mp_mul_digs.o \
bn_s_mp_mul_high_digs.o bn_s_mp_sqr.o bn_s_mp_sub.o bncore.o

#END_INS

objs: $(OBJECTS)

.c.o:
$(LTCOMPILE) $(CFLAGS) -DLTM_EXPORT_CUTOFFS $(LDFLAGS) -o $@ -c $<

LOBJECTS = $(OBJECTS:.o=.lo)

$(LIBNAME): $(OBJECTS)
$(LTLINK) $(LDFLAGS) $(LOBJECTS) -DLTM_EXPORT_CUTOFFS -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION_SO) $(LIBTOOLFLAGS)

timing: $(LIBNAME) demo/timing.c
$(LTLINK) $(CFLAGS) $(LDFLAGS) -DLTM_EXPORT_CUTOFFS -DTIMER demo/timing.c $(LIBNAME) -o timing
@echo ""
@echo " This makefile builds a special dynamic library for timing"
@echo " purpose only!"
@echo " Please do a make clean && make -f makefile.shared to make"
@echo " the production version of the dynamic LibTomMath."
50 changes: 45 additions & 5 deletions tommath.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,35 @@

#include "tommath_class.h"


#ifdef __GNUC__
#define LTM_GNU_VERSION (__GNUC__ * 10000 \
+ __GNUC_MINOR__ * 100 \
+ __GNUC_PATCHLEVEL__)
#endif

/*
__attribute__((visibility ("hidden"))) is in GCC since 3.3.x but the exact patchlevel
is unknown to the author. The recommended minimum GCC version is 4.8.0 according to
https://sites.google.com/site/x32abi/ .
*/
/* TODO: __attribute__((visibility ("hidden"))) is also supported by the Intel compiler */
#if (LTM_GNU_VERSION >= 40800)
/* Workaround for x32 relocation problems */
# if ((defined __x86_64__ ) && (defined __ILP32__))
# define LTM_VISIBILITY_HIDDEN __attribute__((visibility ("hidden")))
/*
Optimizing the cut-off for e.g.: the Toom-Cook algorithms need to change otherwise hidden variables
Define LTM_EXPORT_CUTOFFS to get the workarounds instead.
*/
# define LTM_NEED_EXPLICIT_EXPORT
# else
# define LTM_VISIBILITY_HIDDEN
# endif
#else
# define LTM_VISIBILITY_HIDDEN
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -127,10 +156,21 @@ typedef mp_digit mp_min_u32;
typedef int mp_err;

/* you'll have to tune these... */
extern int KARATSUBA_MUL_CUTOFF,
KARATSUBA_SQR_CUTOFF,
TOOM_MUL_CUTOFF,
TOOM_SQR_CUTOFF;
#if ((defined LTM_NEED_EXPLICIT_EXPORT) && (defined LTM_EXPORT_CUTOFFS))
int mp_get_KARATSUBA_MUL_CUTOFF(void);
void mp_set_KARATSUBA_MUL_CUTOFF(int cutoff);
int mp_get_KARATSUBA_SQR_CUTOFF(void);
void mp_set_KARATSUBA_SQR_CUTOFF(int cutoff);
int mp_get_TOOM_MUL_CUTOFF(void);
void mp_set_TOOM_MUL_CUTOFF(int cutoff);
int mp_get_TOOM_SQR_CUTOFF(void);
void mp_set_TOOM_SQR_CUTOFF(int cutoff);
#endif
extern int LTM_VISIBILITY_HIDDEN KARATSUBA_MUL_CUTOFF;
extern int LTM_VISIBILITY_HIDDEN KARATSUBA_SQR_CUTOFF;
extern int LTM_VISIBILITY_HIDDEN TOOM_MUL_CUTOFF;
extern int LTM_VISIBILITY_HIDDEN TOOM_SQR_CUTOFF;


/* define this to use lower memory usage routines (exptmods mostly) */
/* #define MP_LOW_MEM */
Expand Down Expand Up @@ -489,7 +529,7 @@ int mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y);
#endif

/* table of first PRIME_SIZE primes */
extern const mp_digit ltm_prime_tab[PRIME_SIZE];
extern const mp_digit LTM_VISIBILITY_HIDDEN ltm_prime_tab[PRIME_SIZE];

/* result=1 if a is divisible by one of the first PRIME_SIZE primes */
int mp_prime_is_divisible(const mp_int *a, int *result);
Expand Down
6 changes: 3 additions & 3 deletions tommath_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ int mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y
int s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode);
void bn_reverse(unsigned char *s, int len);

extern const char *const mp_s_rmap;
extern const uint8_t mp_s_rmap_reverse[];
extern const size_t mp_s_rmap_reverse_sz;
extern const char LTM_VISIBILITY_HIDDEN *const mp_s_rmap;
extern const uint8_t LTM_VISIBILITY_HIDDEN mp_s_rmap_reverse[];
extern const size_t LTM_VISIBILITY_HIDDEN mp_s_rmap_reverse_sz;

/* Fancy macro to set an MPI from another type.
* There are several things assumed:
Expand Down