diff --git a/bn.ilg b/bn.ilg index 89badbcfe..3c859f034 100644 --- a/bn.ilg +++ b/bn.ilg @@ -1,6 +1,6 @@ This is makeindex, version 2.14 [02-Oct-2002] (kpathsea + Thai support). -Scanning input file bn.idx....done (57 entries accepted, 0 rejected). -Sorting entries....done (342 comparisons). -Generating output file bn.ind....done (60 lines written, 0 warnings). +Scanning input file bn.idx....done (79 entries accepted, 0 rejected). +Sorting entries....done (511 comparisons). +Generating output file bn.ind....done (82 lines written, 0 warnings). Output written in bn.ind. Transcript written in bn.ilg. diff --git a/bn.ind b/bn.ind index 451128cea..ae1dcdeab 100644 --- a/bn.ind +++ b/bn.ind @@ -1,60 +1,82 @@ \begin{theindex} - \item mp\_add, \hyperpage{23} - \item mp\_and, \hyperpage{23} + \item mp\_add, \hyperpage{25} + \item mp\_add\_d, \hyperpage{48} + \item mp\_and, \hyperpage{25} \item mp\_clear, \hyperpage{7} \item mp\_clear\_multi, \hyperpage{8} - \item mp\_cmp, \hyperpage{18} - \item mp\_cmp\_d, \hyperpage{20} - \item mp\_cmp\_mag, \hyperpage{17} - \item mp\_div, \hyperpage{29} - \item mp\_div\_2, \hyperpage{21} - \item mp\_div\_2d, \hyperpage{22} - \item MP\_EQ, \hyperpage{17} + \item mp\_cmp, \hyperpage{20} + \item mp\_cmp\_d, \hyperpage{21} + \item mp\_cmp\_mag, \hyperpage{19} + \item mp\_div, \hyperpage{26} + \item mp\_div\_2, \hyperpage{22} + \item mp\_div\_2d, \hyperpage{24} + \item mp\_div\_d, \hyperpage{48} + \item mp\_dr\_reduce, \hyperpage{36} + \item mp\_dr\_setup, \hyperpage{36} + \item MP\_EQ, \hyperpage{18} \item mp\_error\_to\_string, \hyperpage{6} - \item mp\_expt\_d, \hyperpage{31} - \item mp\_exptmod, \hyperpage{31} - \item mp\_exteuclid, \hyperpage{39} - \item mp\_gcd, \hyperpage{39} + \item mp\_expt\_d, \hyperpage{39} + \item mp\_exptmod, \hyperpage{39} + \item mp\_exteuclid, \hyperpage{47} + \item mp\_gcd, \hyperpage{47} + \item mp\_get\_int, \hyperpage{16} \item mp\_grow, \hyperpage{12} - \item MP\_GT, \hyperpage{17} + \item MP\_GT, \hyperpage{18} \item mp\_init, \hyperpage{7} \item mp\_init\_copy, \hyperpage{9} \item mp\_init\_multi, \hyperpage{8} + \item mp\_init\_set, \hyperpage{17} + \item mp\_init\_set\_int, \hyperpage{17} \item mp\_init\_size, \hyperpage{10} \item mp\_int, \hyperpage{6} - \item mp\_invmod, \hyperpage{40} - \item mp\_jacobi, \hyperpage{40} - \item mp\_lcm, \hyperpage{39} - \item mp\_lshd, \hyperpage{23} - \item MP\_LT, \hyperpage{17} + \item mp\_invmod, \hyperpage{48} + \item mp\_jacobi, \hyperpage{48} + \item mp\_lcm, \hyperpage{47} + \item mp\_lshd, \hyperpage{24} + \item MP\_LT, \hyperpage{18} \item MP\_MEM, \hyperpage{5} - \item mp\_mul, \hyperpage{25} - \item mp\_mul\_2, \hyperpage{21} - \item mp\_mul\_2d, \hyperpage{22} - \item mp\_n\_root, \hyperpage{31} - \item mp\_neg, \hyperpage{24} + \item mp\_mod, \hyperpage{31} + \item mp\_mod\_d, \hyperpage{48} + \item mp\_montgomery\_calc\_normalization, \hyperpage{34} + \item mp\_montgomery\_reduce, \hyperpage{33} + \item mp\_montgomery\_setup, \hyperpage{33} + \item mp\_mul, \hyperpage{27} + \item mp\_mul\_2, \hyperpage{22} + \item mp\_mul\_2d, \hyperpage{24} + \item mp\_mul\_d, \hyperpage{48} + \item mp\_n\_root, \hyperpage{40} + \item mp\_neg, \hyperpage{25} \item MP\_NO, \hyperpage{5} \item MP\_OKAY, \hyperpage{5} - \item mp\_or, \hyperpage{23} - \item mp\_prime\_fermat, \hyperpage{33} - \item mp\_prime\_is\_divisible, \hyperpage{33} - \item mp\_prime\_is\_prime, \hyperpage{34} - \item mp\_prime\_miller\_rabin, \hyperpage{33} - \item mp\_prime\_next\_prime, \hyperpage{34} - \item mp\_prime\_rabin\_miller\_trials, \hyperpage{34} - \item mp\_prime\_random, \hyperpage{35} - \item mp\_radix\_size, \hyperpage{37} - \item mp\_read\_radix, \hyperpage{37} - \item mp\_rshd, \hyperpage{23} + \item mp\_or, \hyperpage{25} + \item mp\_prime\_fermat, \hyperpage{41} + \item mp\_prime\_is\_divisible, \hyperpage{41} + \item mp\_prime\_is\_prime, \hyperpage{42} + \item mp\_prime\_miller\_rabin, \hyperpage{41} + \item mp\_prime\_next\_prime, \hyperpage{42} + \item mp\_prime\_rabin\_miller\_trials, \hyperpage{42} + \item mp\_prime\_random, \hyperpage{43} + \item mp\_prime\_random\_ex, \hyperpage{43} + \item mp\_radix\_size, \hyperpage{45} + \item mp\_read\_radix, \hyperpage{45} + \item mp\_read\_unsigned\_bin, \hyperpage{46} + \item mp\_reduce, \hyperpage{32} + \item mp\_reduce\_2k, \hyperpage{37} + \item mp\_reduce\_2k\_setup, \hyperpage{37} + \item mp\_reduce\_setup, \hyperpage{32} + \item mp\_rshd, \hyperpage{24} \item mp\_set, \hyperpage{15} \item mp\_set\_int, \hyperpage{16} \item mp\_shrink, \hyperpage{11} - \item mp\_sqr, \hyperpage{25} - \item mp\_sub, \hyperpage{23} - \item mp\_toradix, \hyperpage{37} + \item mp\_sqr, \hyperpage{29} + \item mp\_sub, \hyperpage{25} + \item mp\_sub\_d, \hyperpage{48} + \item mp\_to\_unsigned\_bin, \hyperpage{46} + \item mp\_toradix, \hyperpage{45} + \item mp\_unsigned\_bin\_size, \hyperpage{46} \item MP\_VAL, \hyperpage{5} - \item mp\_xor, \hyperpage{23} + \item mp\_xor, \hyperpage{25} \item MP\_YES, \hyperpage{5} \end{theindex} diff --git a/bn.pdf b/bn.pdf index 90e799b13..52421fd59 100644 Binary files a/bn.pdf and b/bn.pdf differ diff --git a/bn.tex b/bn.tex index bed0b2608..a25458625 100644 --- a/bn.tex +++ b/bn.tex @@ -49,7 +49,7 @@ \begin{document} \frontmatter \pagestyle{empty} -\title{LibTomMath User Manual \\ v0.28} +\title{LibTomMath User Manual \\ v0.30} \author{Tom St Denis \\ tomstdenis@iahu.ca} \maketitle This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been @@ -87,7 +87,7 @@ \section{License} release as well. This textbook is meant to compliment the project by providing a more solid walkthrough of the development algorithms used in the library. -Since both\footnote{Note that the MPI files under mtest/ are copyrighted by Michael Fromberger.} are in the +Since both\footnote{Note that the MPI files under mtest/ are copyrighted by Michael Fromberger. They are not required to use LibTomMath.} are in the public domain everyone is entitled to do with them as they see fit. \section{Building LibTomMath} @@ -114,7 +114,7 @@ \section{Building LibTomMath} This will build the library and archive the object files in ``tommath.lib''. This has been tested with MSVC version 6.00 with service pack 5. -Tbere is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile. It requires Cygwin +There is limited support for making a ``DLL'' in windows via the ``makefile.cygwin\_dll'' makefile. It requires Cygwin to work with since it requires the auto-export/import functionality. The resulting DLL and imprt library ``libtomcrypt.dll.a'' can be used to link LibTomMath dynamically to any Windows program using Cygwin. @@ -385,7 +385,7 @@ \subsection{Other Initializers} int mp_init_copy (mp_int * a, mp_int * b); \end{alltt} -This function will initialize ``a'' and make it a copy of ``b'' if all goes well. +This function will initialize $a$ and make it a copy of $b$ if all goes well. \begin{small} \begin{alltt} int main(void) @@ -420,8 +420,8 @@ \subsection{Other Initializers} int mp_init_size (mp_int * a, int size); \end{alltt} -The ``size'' parameter must be greater than zero. If the function succeeds the mp\_int ``a'' will be initialized -to have ``size'' digits (which are all initially zero). +The $size$ parameter must be greater than zero. If the function succeeds the mp\_int $a$ will be initialized +to have $size$ digits (which are all initially zero). \begin{small} \begin{alltt} int main(void) @@ -453,7 +453,7 @@ \subsection{Reducing Memory Usage} int mp_shrink (mp_int * a); \end{alltt} -This will remove excess digits of the mp\_int ``a''. If the operation fails the mp\_int should be intact without the +This will remove excess digits of the mp\_int $a$. If the operation fails the mp\_int should be intact without the excess digits being removed. Note that you can use a shrunk mp\_int in further computations, however, such operations will require heap operations which can be slow. It is not ideal to shrink mp\_int variables that you will further modify in the system (unless you are seriously low on memory). @@ -502,8 +502,8 @@ \subsection{Adding additional digits} int mp_grow (mp_int * a, int size); \end{alltt} -This will grow the array of digits of ``a'' to ``size''. If the \textit{alloc} parameter is already bigger than -``size'' the function will not do anything. +This will grow the array of digits of $a$ to $size$. If the \textit{alloc} parameter is already bigger than +$size$ the function will not do anything. \begin{small} \begin{alltt} int main(void) @@ -552,7 +552,7 @@ \subsection{Single Digit} void mp_set (mp_int * a, mp_digit b); \end{alltt} -This will zero the contents of ``a'' and make it represent an integer equal to the value of ``b''. Note that this +This will zero the contents of $a$ and make it represent an integer equal to the value of $b$. Note that this function has a return type of \textbf{void}. It cannot cause an error so it is safe to assume the function succeeded. @@ -578,20 +578,29 @@ \subsection{Single Digit} \} \end{alltt} \end{small} -\subsection{Long Constant} +\subsection{Long Constants} -When you want to set a constant that is the size of an ISO C ``unsigned long'' and larger than a single -digit the following function is provided. +To set a constant that is the size of an ISO C ``unsigned long'' and larger than a single digit the following function +can be used. \index{mp\_set\_int} \begin{alltt} int mp_set_int (mp_int * a, unsigned long b); \end{alltt} -This will assign the value of the 32-bit variable ``b'' to the mp\_int ``a''. Unlike mp\_set() this function will always +This will assign the value of the 32-bit variable $b$ to the mp\_int $a$. Unlike mp\_set() this function will always accept a 32-bit input regardless of the size of a single digit. However, since the value may span several digits this function can fail if it runs out of heap memory. +To get the ``unsigned long'' copy of an mp\_int the following function can be used. + +\index{mp\_get\_int} +\begin{alltt} +unsigned long mp_get_int (mp_int * a); +\end{alltt} + +This will return the 32 least significant bits of the mp\_int $a$. + \begin{small} \begin{alltt} int main(void) \{ @@ -610,6 +619,9 @@ \subsection{Long Constant} mp_error_to_string(result)); return EXIT_FAILURE; \} + + printf("number == \%lu", mp_get_int(&number)); + /* we're done with it. */ mp_clear(&number); @@ -617,6 +629,58 @@ \subsection{Long Constant} \} \end{alltt} \end{small} +This should output the following if the program succeeds. + +\begin{alltt} +number == 654321 +\end{alltt} + +\subsection{Initialize and Setting Constants} +To both initialize and set small constants the following two functions are available. +\index{mp\_init\_set} \index{mp\_init\_set\_int} +\begin{alltt} +int mp_init_set (mp_int * a, mp_digit b); +int mp_init_set_int (mp_int * a, unsigned long b); +\end{alltt} + +Both functions work like the previous counterparts except they first mp\_init $a$ before setting the values. + +\begin{alltt} +int main(void) +\{ + mp_int number1, number2; + int result; + + /* initialize and set a single digit */ + if ((result = mp_init_set(&number1, 100)) != MP_OKAY) \{ + printf("Error setting number1: \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* initialize and set a long */ + if ((result = mp_init_set_int(&number2, 1023)) != MP_OKAY) \{ + printf("Error setting number2: \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* display */ + printf("Number1, Number2 == \%lu, \%lu", + mp_get_int(&number1), mp_get_int(&number2)); + + /* clear */ + mp_clear_multi(&number1, &number2, NULL); + + return EXIT_SUCCESS; +\} +\end{alltt} + +If this program succeeds it shall output. +\begin{alltt} +Number1, Number2 == 100, 1023 +\end{alltt} + \section{Comparisons} Comparisons in LibTomMath are always performed in a ``left to right'' fashion. There are three possible return codes @@ -644,13 +708,13 @@ \subsection{Unsigned comparison} An unsigned comparison considers only the digits themselves and not the associated \textit{sign} flag of the mp\_int structures. This is analogous to an absolute comparison. The function mp\_cmp\_mag() will compare two -mp\_int variables based on their digits only. +mp\_int variables based on their digits only. \index{mp\_cmp\_mag} \begin{alltt} int mp_cmp(mp_int * a, mp_int * b); \end{alltt} -This will compare ``a'' to ``b'' placing ``a'' to the left of ``b''. This function cannot fail and will return one of the +This will compare $a$ to $b$ placing $a$ to the left of $b$. This function cannot fail and will return one of the three compare codes listed in figure \ref{fig:CMP}. \begin{small} \begin{alltt} @@ -707,7 +771,7 @@ \subsection{Signed comparison} int mp_cmp(mp_int * a, mp_int * b); \end{alltt} -This will compare ``a'' to the left of ``b''. It will first compare the signs of the two mp\_int variables. If they +This will compare $a$ to the left of $b$. It will first compare the signs of the two mp\_int variables. If they differ it will return immediately based on their signs. If the signs are equal then it will compare the digits individually. This function will return one of the compare conditions codes listed in figure \ref{fig:CMP}. @@ -763,7 +827,7 @@ \subsection{Single Digit} int mp_cmp_d(mp_int * a, mp_digit b); \end{alltt} -This will compare ``a'' to the left of ``b'' using a signed comparison. Note that it will always treat ``b'' as +This will compare $a$ to the left of $b$ using a signed comparison. Note that it will always treat $b$ as positive. This function is rather handy when you have to compare against small values such as $1$ (which often comes up in cryptography). The function cannot fail and will return one of the tree compare condition codes listed in figure \ref{fig:CMP}. @@ -820,7 +884,7 @@ \subsection{Multiplication by two} int mp_div_2(mp_int * a, mp_int * b); \end{alltt} -The former will assign twice ``a'' to ``b'' while the latter will assign half ``a'' to ``b''. These functions are fast +The former will assign twice $a$ to $b$ while the latter will assign half $a$ to $b$. These functions are fast since the shift counts and maskes are hardcoded into the routines. \begin{small} \begin{alltt} @@ -883,8 +947,8 @@ \subsection{Multiplication by two} int mp_mul_2d(mp_int * a, int b, mp_int * c); \end{alltt} -This will multiply ``a'' by $2^b$ and store the result in ``c''. If the value of $b$ is less than or equal to -zero the function will copy ``a'' to ``c'' without performing any further actions. +This will multiply $a$ by $2^b$ and store the result in ``c''. If the value of $b$ is less than or equal to +zero the function will copy $a$ to ``c'' without performing any further actions. To divide by a power of two use the following. @@ -892,8 +956,8 @@ \subsection{Multiplication by two} \begin{alltt} int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d); \end{alltt} -Which will divide ``a'' by $2^b$, store the quotient in ``c'' and the remainder in ``d'. If $b \le 0$ then the -function simply copies ``a'' over to ``c'' and zeroes ``d''. The variable ``d'' may be passed as a \textbf{NULL} +Which will divide $a$ by $2^b$, store the quotient in ``c'' and the remainder in ``d'. If $b \le 0$ then the +function simply copies $a$ over to ``c'' and zeroes $d$. The variable $d$ may be passed as a \textbf{NULL} value to signal that the remainder is not desired. \subsection{Polynomial Basis Operations} @@ -911,14 +975,14 @@ \subsection{Polynomial Basis Operations} int mp_lshd (mp_int * a, int b); \end{alltt} -This will multiply ``a'' in place by $x^b$ which is equivalent to shifting the digits left $b$ places and inserting zeroes +This will multiply $a$ in place by $x^b$ which is equivalent to shifting the digits left $b$ places and inserting zeroes in the least significant digits. Similarly to divide by a power of $x$ the following function is provided. \index{mp\_rshd} \begin{alltt} void mp_rshd (mp_int * a, int b) \end{alltt} -This will divide ``a'' in place by $x^b$ and discard the remainder. This function cannot fail as it performs the operations +This will divide $a$ in place by $x^b$ and discard the remainder. This function cannot fail as it performs the operations in place and no new digits are required to complete it. \subsection{AND, OR and XOR Operations} @@ -948,7 +1012,6 @@ \section{Addition and Subtraction} Which perform $c = a \odot b$ where $\odot$ is one of signed addition or subtraction. The operations are fully sign aware. - \section{Sign Manipulation} \subsection{Negation} \label{sec:NEG} @@ -959,7 +1022,7 @@ \subsection{Negation} int mp_neg (mp_int * a, mp_int * b); \end{alltt} -Which assigns $-b$ to $a$. +Which assigns $-a$ to $b$. \subsection{Absolute} Simple integer absolutes can be performed with the following. @@ -969,7 +1032,20 @@ \subsection{Absolute} int mp_abs (mp_int * a, mp_int * b); \end{alltt} -Which assigns $\vert b \vert$ to $a$. +Which assigns $\vert a \vert$ to $b$. + +\section{Integer Division and Remainder} +To perform a complete and general integer division with remainder use the following function. + +\index{mp\_div} +\begin{alltt} +int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d); +\end{alltt} + +This divides $a$ by $b$ and stores the quotient in $c$ and $d$. The signed quotient is computed such that +$bc + d = a$. Note that either of $c$ or $d$ can be set to \textbf{NULL} if their value is not required. If +$b$ is zero the function returns \textbf{MP\_VAL}. + \chapter{Multiplication and Squaring} \section{Multiplication} @@ -986,6 +1062,57 @@ \section{Multiplication} Fortunately for the developer you don't really need to know this unless you really want to fine tune the system. mp\_mul() will determine on its own\footnote{Some tweaking may be required.} what routine to use automatically when it is called. +\begin{alltt} +int main(void) +\{ + mp_int number1, number2; + int result; + + /* Initialize the numbers */ + if ((result = mp_init_multi(&number1, + &number2, NULL)) != MP_OKAY) \{ + printf("Error initializing the numbers. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* set the terms */ + if ((result = mp_set_int(&number, 257)) != MP_OKAY) \{ + printf("Error setting number1. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + if ((result = mp_set_int(&number2, 1023)) != MP_OKAY) \{ + printf("Error setting number2. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* multiply them */ + if ((result = mp_mul(&number1, &number2, + &number1)) != MP_OKAY) \{ + printf("Error multiplying terms. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* display */ + printf("number1 * number2 == \%lu", mp_get_int(&number1)); + + /* free terms and return */ + mp_clear_multi(&number1, &number2, NULL); + + return EXIT_SUCCESS; +\} +\end{alltt} + +If this program succeeds it shall output the following. + +\begin{alltt} +number1 * number2 == 262911 +\end{alltt} + \section{Squaring} Since squaring can be performed faster than multiplication it is performed it's own function instead of just using mp\_mul(). @@ -995,12 +1122,12 @@ \section{Squaring} int mp_sqr (mp_int * a, mp_int * b); \end{alltt} -Will square ``a'' and store it in ``b''. Like the case of multiplication there are four different squaring -algorithms all which can be called from mp\_sqr(). +Will square $a$ and store it in $b$. Like the case of multiplication there are four different squaring +algorithms all which can be called from mp\_sqr(). It is ideal to use mp\_sqr over mp\_mul when squaring terms. \section{Tuning Polynomial Basis Routines} -Both Toom-Cook and Karatsuba multiplication algorithms are faster than the traditional $O(n^2)$ approach that +Both of the Toom-Cook and Karatsuba multiplication algorithms are faster than the traditional $O(n^2)$ approach that the Comba and baseline algorithms use. At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectfully they require considerably less work. For example, a 10000-digit multiplication would take roughly 724,000 single precision multiplications with Toom-Cook or 100,000,000 single precision multiplications with the standard Comba (a factor @@ -1044,30 +1171,286 @@ \section{Tuning Polynomial Basis Routines} tuning takes a very long time as the cutoff points are likely to be very high. \chapter{Modular Reduction} -\section{Integer Division and Remainder} -To perform a complete and general integer division with remainder use the following function. -\index{mp\_div} +Modular reduction is process of taking the remainder of one quantity divided by another. Expressed +as (\ref{eqn:mod}) the modular reduction is equivalent to the remainder of $b$ divided by $c$. + +\begin{equation} +a \equiv b \mbox{ (mod }c\mbox{)} +\label{eqn:mod} +\end{equation} + +Of particular interest to cryptography are reductions where $b$ is limited to the range $0 \le b < c^2$ since particularly +fast reduction algorithms can be written for the limited range. + +Note that one of the four optimized reduction algorithms are automatically chosen in the modular exponentiation +algorithm mp\_exptmod when an appropriate modulus is detected. + +\section{Straight Division} +In order to effect an arbitrary modular reduction the following algorithm is provided. + +\index{mp\_mod} \begin{alltt} -int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d); +int mp_mod(mp_int *a, mp_int *b, mp_int *c); \end{alltt} -This divides ``a'' by ``b'' and stores the quotient in ``c'' and ``d''. The signed quotient is computed such that -$bc + d = a$. Note that either of ``c'' or ``d'' can be set to \textbf{NULL} if their value is not required. +This reduces $a$ modulo $b$ and stores the result in $c$. The sign of $c$ shall agree with the sign +of $b$. This algorithm accepts an input $a$ of any range and is not limited by $0 \le a < b^2$. \section{Barrett Reduction} + +Barrett reduction is a generic optimized reduction algorithm that requires pre--computation to achieve +a decent speedup over straight division. First a $mu$ value must be precomputed with the following function. + +\index{mp\_reduce\_setup} +\begin{alltt} +int mp_reduce_setup(mp_int *a, mp_int *b); +\end{alltt} + +Given a modulus in $b$ this produces the required $mu$ value in $a$. For any given modulus this only has to +be computed once. Modular reduction can now be performed with the following. + +\index{mp\_reduce} +\begin{alltt} +int mp_reduce(mp_int *a, mp_int *b, mp_int *c); +\end{alltt} + +This will reduce $a$ in place modulo $b$ with the precomputed $mu$ value in $c$. $a$ must be in the range +$0 \le a < b^2$. + +\begin{alltt} +int main(void) +\{ + mp_int a, b, c, mu; + int result; + + /* initialize a,b to desired values, mp_init mu, + * c and set c to 1...we want to compute a^3 mod b + */ + + /* get mu value */ + if ((result = mp_reduce_setup(&mu, b)) != MP_OKAY) \{ + printf("Error getting mu. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* square a to get c = a^2 */ + if ((result = mp_sqr(&a, &c)) != MP_OKAY) \{ + printf("Error squaring. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* now reduce `c' modulo b */ + if ((result = mp_reduce(&c, &b, &mu)) != MP_OKAY) \{ + printf("Error reducing. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* multiply a to get c = a^3 */ + if ((result = mp_mul(&a, &c, &c)) != MP_OKAY) \{ + printf("Error reducing. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* now reduce `c' modulo b */ + if ((result = mp_reduce(&c, &b, &mu)) != MP_OKAY) \{ + printf("Error reducing. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* c now equals a^3 mod b */ + + return EXIT_SUCCESS; +\} +\end{alltt} + +This program will calculate $a^3 \mbox{ mod }b$ if all the functions succeed. + \section{Montgomery Reduction} + +Montgomery is a specialized reduction algorithm for any odd moduli. Like Barrett reduction a pre--computation +step is required. This is accomplished with the following. + +\index{mp\_montgomery\_setup} +\begin{alltt} +int mp_montgomery_setup(mp_int *a, mp_digit *mp); +\end{alltt} + +For the given odd moduli $a$ the precomputation value is placed in $mp$. The reduction is computed with the +following. + +\index{mp\_montgomery\_reduce} +\begin{alltt} +int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp); +\end{alltt} +This reduces $a$ in place modulo $m$ with the pre--computed value $mp$. $a$ must be in the range +$0 \le a < b^2$. + +Montgomery reduction is faster than Barrett reduction for moduli smaller than the ``comba'' limit. With the default +setup for instance, the limit is $127$ digits ($3556$--bits). Note that this function is not limited to +$127$ digits just that it falls back to a baseline algorithm after that point. + +An important observation is that this reduction does not return $a \mbox{ mod }m$ but $aR^{-1} \mbox{ mod }m$ +where $R = \beta^n$, $n$ is the n number of digits in $m$ and $\beta$ is radix used (default is $2^{28}$). + +To quickly calculate $R$ the following function was provided. + +\index{mp\_montgomery\_calc\_normalization} +\begin{alltt} +int mp_montgomery_calc_normalization(mp_int *a, mp_int *b); +\end{alltt} +Which calculates $a = R$ for the odd moduli $b$ without using multiplication or division. + +The normal modus operandi for Montgomery reductions is to normalize the integers before entering the system. For +example, to calculate $a^3 \mbox { mod }b$ using Montgomery reduction the value of $a$ can be normalized by +multiplying it by $R$. Consider the following code snippet. + +\begin{alltt} +int main(void) +\{ + mp_int a, b, c, R; + mp_digit mp; + int result; + + /* initialize a,b to desired values, + * mp_init R, c and set c to 1.... + */ + + /* get normalization */ + if ((result = mp_montgomery_calc_normalization(&R, b)) != MP_OKAY) \{ + printf("Error getting norm. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* get mp value */ + if ((result = mp_montgomery_setup(&c, &mp)) != MP_OKAY) \{ + printf("Error setting up montgomery. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* normalize `a' so now a is equal to aR */ + if ((result = mp_mulmod(&a, &R, &b, &a)) != MP_OKAY) \{ + printf("Error computing aR. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* square a to get c = a^2R^2 */ + if ((result = mp_sqr(&a, &c)) != MP_OKAY) \{ + printf("Error squaring. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* now reduce `c' back down to c = a^2R^2 * R^-1 == a^2R */ + if ((result = mp_montgomery_reduce(&c, &b, mp)) != MP_OKAY) \{ + printf("Error reducing. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* multiply a to get c = a^3R^2 */ + if ((result = mp_mul(&a, &c, &c)) != MP_OKAY) \{ + printf("Error reducing. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* now reduce `c' back down to c = a^3R^2 * R^-1 == a^3R */ + if ((result = mp_montgomery_reduce(&c, &b, mp)) != MP_OKAY) \{ + printf("Error reducing. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* now reduce (again) `c' back down to c = a^3R * R^-1 == a^3 */ + if ((result = mp_montgomery_reduce(&c, &b, mp)) != MP_OKAY) \{ + printf("Error reducing. \%s", + mp_error_to_string(result)); + return EXIT_FAILURE; + \} + + /* c now equals a^3 mod b */ + + return EXIT_SUCCESS; +\} +\end{alltt} + +This particular example does not look too efficient but it demonstrates the point of the algorithm. By +normalizing the inputs the reduced results are always of the form $aR$ for some variable $a$. This allows +a single final reduction to correct for the normalization and the fast reduction used within the algorithm. + +For more details consider examining the file \textit{bn\_mp\_exptmod\_fast.c}. + \section{Restricted Dimminished Radix} + +``Dimminished Radix'' reduction refers to reduction with respect to moduli that are ameniable to simple +digit shifting and small multiplications. In this case the ``restricted'' variant refers to moduli of the +form $\beta^k - p$ for some $k \ge 0$ and $0 < p < \beta$ where $\beta$ is the radix (default to $2^{28}$). + +As in the case of Montgomery reduction there is a pre--computation phase required for a given modulus. + +\index{mp\_dr\_setup} +\begin{alltt} +void mp_dr_setup(mp_int *a, mp_digit *d); +\end{alltt} + +This computes the value required for the modulus $a$ and stores it in $d$. This function cannot fail +and does not return any error codes. After the pre--computation a reduction can be performed with the +following. + +\index{mp\_dr\_reduce} +\begin{alltt} +int mp_dr_reduce(mp_int *a, mp_int *b, mp_digit mp); +\end{alltt} + +This reduces $a$ in place modulo $b$ with the pre--computed value $mp$. $b$ must be of a restricted +dimminished radix form and $a$ must be in the range $0 \le a < b^2$. Dimminished radix reductions are +much faster than both Barrett and Montgomery reductions as they have a much lower asymtotic running time. + +Since the moduli are restricted this algorithm is not particularly useful for something like Rabin, RSA or +BBS cryptographic purposes. This reduction algorithm is useful for Diffie-Hellman and ECC where fixed +primes are acceptable. + +Note that unlike Montgomery reduction there is no normalization process. The result of this function is +equal to the correct residue. + \section{Unrestricted Dimminshed Radix} +Unrestricted reductions work much like the restricted counterparts except in this case the moduli is of the +form $2^k - p$ for $0 < p < \beta$. In this sense the unrestricted reductions are more flexible as they +can be applied to a wider range of numbers. + +\index{mp\_reduce\_2k\_setup} +\begin{alltt} +int mp_reduce_2k_setup(mp_int *a, mp_digit *d); +\end{alltt} + +This will compute the required $d$ value for the given moduli $a$. + +\index{mp\_reduce\_2k} +\begin{alltt} +int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d); +\end{alltt} + +This will reduce $a$ in place modulo $n$ with the pre--computed value $d$. From my experience this routine is +slower than mp\_dr\_reduce but faster for most moduli sizes than the Montgomery reduction. + \chapter{Exponentiation} \section{Single Digit Exponentiation} \index{mp\_expt\_d} \begin{alltt} int mp_expt_d (mp_int * a, mp_digit b, mp_int * c) \end{alltt} -This computes $c = a^b$ using a simple binary left-to-write algorithm. It is faster than repeated multiplications for -all values of $b$ greater than three. +This computes $c = a^b$ using a simple binary left-to-right algorithm. It is faster than repeated multiplications by +$a$ for all values of $b$ greater than three. \section{Modular Exponentiation} \index{mp\_exptmod} @@ -1077,7 +1460,12 @@ \section{Modular Exponentiation} This computes $Y \equiv G^X \mbox{ (mod }P\mbox{)}$ using a variable width sliding window algorithm. This function will automatically detect the fastest modular reduction technique to use during the operation. For negative values of $X$ the operation is performed as $Y \equiv (G^{-1} \mbox{ mod }P)^{\vert X \vert} \mbox{ (mod }P\mbox{)}$ provided that -$gcd(G, P) = 1$. +$gcd(G, P) = 1$. + +This function is actually a shell around the two internal exponentiation functions. This routine will automatically +detect when Barrett, Montgomery, Restricted and Unrestricted Dimminished Radix based exponentiation can be used. Generally +moduli of the a ``restricted dimminished radix'' form lead to the fastest modular exponentiations. Followed by Montgomery +and the other two algorithms. \section{Root Finding} \index{mp\_n\_root} @@ -1090,6 +1478,11 @@ \section{Root Finding} a root with the sign of the input for odd roots. For example, performing $4^{1/2}$ will return $2$ whereas $(-8)^{1/3}$ will return $-2$. +This algorithm uses the ``Newton Approximation'' method and will converge on the correct root fairly quickly. Since +the algorithm requires raising $a$ to the power of $b$ it is not ideal to attempt to find roots for large +values of $b$. If particularly large roots are required then a factor method could be used instead. For example, +$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$. + \chapter{Prime Numbers} \section{Trial Division} \index{mp\_prime\_is\_divisible} @@ -1168,10 +1561,44 @@ \section{Random Primes} \end{alltt} Which is a function that must read $len$ bytes (and return the amount stored) into $dst$. The $dat$ variable is simply -copied from the original input. It can be used to pass RNG context data to the callback. +copied from the original input. It can be used to pass RNG context data to the callback. The function +mp\_prime\_random() is more suitable for generating primes which must be secret (as in the case of RSA) since there +is no skew on the least significant bits. -The function mp\_prime\_random() is more suitable for generating primes which must be secret (as in the case of RSA) since -there is no skew on the least significant bits. +\textit{Note:} As of v0.30 of the LibTomMath library this function has been deprecated. It is still available +but users are encouraged to use the new mp\_prime\_random\_ex() function instead. + +\subsection{Extended Generation} +\index{mp\_prime\_random\_ex} +\begin{alltt} +int mp_prime_random_ex(mp_int *a, int t, + int size, int flags, + ltm_prime_callback cb, void *dat); +\end{alltt} +This will generate a prime in $a$ using $t$ tests of the primality testing algorithms. The variable $size$ +specifies the bit length of the prime desired. The variable $flags$ specifies one of several options available +(see fig. \ref{fig:primeopts}) which can be OR'ed together. The callback parameters are used as in +mp\_prime\_random(). + +\begin{figure}[here] +\begin{center} +\begin{small} +\begin{tabular}{|r|l|} +\hline \textbf{Flag} & \textbf{Meaning} \\ +\hline LTM\_PRIME\_BBS & Make the prime congruent to $3$ modulo $4$ \\ +\hline LTM\_PRIME\_SAFE & Make a prime $p$ such that $(p - 1)/2$ is also prime. \\ + & This option implies LTM\_PRIME\_BBS as well. \\ +\hline LTM\_PRIME\_2MSB\_OFF & Makes sure that the bit adjacent to the most significant bit \\ + & Is forced to zero. \\ +\hline LTM\_PRIME\_2MSB\_ON & Makes sure that the bit adjacent to the most significant bit \\ + & Is forced to one. \\ +\hline +\end{tabular} +\end{small} +\end{center} +\caption{Primality Generation Options} +\label{fig:primeopts} +\end{figure} \chapter{Input and Output} \section{ASCII Conversions} @@ -1180,7 +1607,7 @@ \subsection{To ASCII} \begin{alltt} int mp_toradix (mp_int * a, char *str, int radix); \end{alltt} -This still store ``a'' in ``str'' as a base-``radix'' string of ASCII chars. This function appends a NUL character +This still store $a$ in ``str'' as a base-``radix'' string of ASCII chars. This function appends a NUL character to terminate the string. Valid values of ``radix'' line in the range $[2, 64]$. To determine the size (exact) required by the conversion before storing any data use the following function. @@ -1196,12 +1623,46 @@ \subsection{From ASCII} \begin{alltt} int mp_read_radix (mp_int * a, char *str, int radix); \end{alltt} -This will read the base-``radix'' NUL terminated string from ``str'' into ``a''. It will stop reading when it reads a +This will read the base-``radix'' NUL terminated string from ``str'' into $a$. It will stop reading when it reads a character it does not recognize (which happens to include th NUL char... imagine that...). A single leading $-$ sign can be used to denote a negative number. \section{Binary Conversions} -\section{Stream Functions} + +Converting an mp\_int to and from binary is another keen idea. + +\index{mp\_unsigned\_bin\_size} +\begin{alltt} +int mp_unsigned_bin_size(mp_int *a); +\end{alltt} + +This will return the number of bytes (octets) required to store the unsigned copy of the integer $a$. + +\index{mp\_to\_unsigned\_bin} +\begin{alltt} +int mp_to_unsigned_bin(mp_int *a, unsigned char *b); +\end{alltt} +This will store $a$ into the buffer $b$ in big--endian format. Fortunately this is exactly what DER (or is it ASN?) +requires. It does not store the sign of the integer. + +\index{mp\_read\_unsigned\_bin} +\begin{alltt} +int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c); +\end{alltt} +This will read in an unsigned big--endian array of bytes (octets) from $b$ of length $c$ into $a$. The resulting +integer $a$ will always be positive. + +For those who acknowledge the existence of negative numbers (heretic!) there are ``signed'' versions of the +previous functions. + +\begin{alltt} +int mp_signed_bin_size(mp_int *a); +int mp_read_signed_bin(mp_int *a, unsigned char *b, int c); +int mp_to_signed_bin(mp_int *a, unsigned char *b); +\end{alltt} +They operate essentially the same as the unsigned copies except they prefix the data with zero or non--zero +byte depending on the sign. If the sign is zpos (e.g. not negative) the prefix is zero, otherwise the prefix +is non--zero. \chapter{Algebraic Functions} \section{Extended Euclidean Algorithm} @@ -1217,6 +1678,8 @@ \section{Extended Euclidean Algorithm} a \cdot U1 + b \cdot U2 = U3 \end{equation} +Any of the U1/U2/U3 paramters can be set to \textbf{NULL} if they are not desired. + \section{Greatest Common Divisor} \index{mp\_gcd} \begin{alltt} @@ -1248,9 +1711,22 @@ \section{Modular Inverse} \end{alltt} Computes the multiplicative inverse of $a$ modulo $b$ and stores the result in $c$ such that $ac \equiv 1 \mbox{ (mod }b\mbox{)}$. +\section{Single Digit Functions} +For those using small numbers (\textit{snicker snicker}) there are several ``helper'' functions -\section{Single Digit Functions} +\index{mp\_add\_d} \index{mp\_sub\_d} \index{mp\_mul\_d} \index{mp\_div\_d} \index{mp\_mod\_d} +\begin{alltt} +int mp_add_d(mp_int *a, mp_digit b, mp_int *c); +int mp_sub_d(mp_int *a, mp_digit b, mp_int *c); +int mp_mul_d(mp_int *a, mp_digit b, mp_int *c); +int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d); +int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c); +\end{alltt} + +These work like the full mp\_int capable variants except the second parameter $b$ is a mp\_digit. These +functions fairly handy if you have to work with relatively small numbers since you will not have to allocate +an entire mp\_int to store a number like $1$ or $2$. \input{bn.ind} diff --git a/bn_fast_s_mp_sqr.c b/bn_fast_s_mp_sqr.c index d5b9787d0..f62ae54e3 100644 --- a/bn_fast_s_mp_sqr.c +++ b/bn_fast_s_mp_sqr.c @@ -31,8 +31,7 @@ * Based on Algorithm 14.16 on pp.597 of HAC. * */ -int -fast_s_mp_sqr (mp_int * a, mp_int * b) +int fast_s_mp_sqr (mp_int * a, mp_int * b) { int olduse, newused, res, ix, pa; mp_word W2[MP_WARRAY], W[MP_WARRAY]; diff --git a/bn_mp_cnt_lsb.c b/bn_mp_cnt_lsb.c index e46161b21..07cb709ca 100644 --- a/bn_mp_cnt_lsb.c +++ b/bn_mp_cnt_lsb.c @@ -14,11 +14,15 @@ */ #include +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + /* Counts the number of lsbs which are zero before the first zero bit */ int mp_cnt_lsb(mp_int *a) { int x; - mp_digit q; + mp_digit q, qq; /* easy out */ if (mp_iszero(a) == 1) { @@ -31,11 +35,13 @@ int mp_cnt_lsb(mp_int *a) x *= DIGIT_BIT; /* now scan this digit until a 1 is found */ - while ((q & 1) == 0) { - q >>= 1; - x += 1; + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); } - return x; } diff --git a/bn_mp_exteuclid.c b/bn_mp_exteuclid.c index 6875f7222..cb3f78726 100644 --- a/bn_mp_exteuclid.c +++ b/bn_mp_exteuclid.c @@ -1,69 +1,69 @@ -/* LibTomMath, multiple-precision integer library -- Tom St Denis - * - * LibTomMath is a library that provides multiple-precision - * integer arithmetic as well as number theoretic functionality. - * - * The library was designed directly after the MPI library by - * Michael Fromberger but has been written from scratch with - * additional optimizations in place. - * - * The library is free for all purposes without any express - * guarantee it works. - * - * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org - */ -#include - -/* Extended euclidean algorithm of (a, b) produces - a*u1 + b*u2 = u3 - */ -int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3) -{ - mp_int u1,u2,u3,v1,v2,v3,t1,t2,t3,q,tmp; - int err; - - if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) { - return err; - } - - /* initialize, (u1,u2,u3) = (1,0,a) */ - mp_set(&u1, 1); - if ((err = mp_copy(a, &u3)) != MP_OKAY) { goto _ERR; } - - /* initialize, (v1,v2,v3) = (0,1,b) */ - mp_set(&v2, 1); - if ((err = mp_copy(b, &v3)) != MP_OKAY) { goto _ERR; } - - /* loop while v3 != 0 */ - while (mp_iszero(&v3) == MP_NO) { - /* q = u3/v3 */ - if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) { goto _ERR; } - - /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */ - if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) { goto _ERR; } - if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) { goto _ERR; } - if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) { goto _ERR; } - if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) { goto _ERR; } - if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) { goto _ERR; } - if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) { goto _ERR; } - - /* (u1,u2,u3) = (v1,v2,v3) */ - if ((err = mp_copy(&v1, &u1)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&v2, &u2)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&v3, &u3)) != MP_OKAY) { goto _ERR; } - - /* (v1,v2,v3) = (t1,t2,t3) */ - if ((err = mp_copy(&t1, &v1)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&t2, &v2)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; } - } - - /* copy result out */ - if (U1 != NULL) { mp_exch(U1, &u1); } - if (U2 != NULL) { mp_exch(U2, &u2); } - if (U3 != NULL) { mp_exch(U3, &u3); } - - err = MP_OKAY; -_ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL); - return err; -} +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* Extended euclidean algorithm of (a, b) produces + a*u1 + b*u2 = u3 + */ +int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3) +{ + mp_int u1,u2,u3,v1,v2,v3,t1,t2,t3,q,tmp; + int err; + + if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) { + return err; + } + + /* initialize, (u1,u2,u3) = (1,0,a) */ + mp_set(&u1, 1); + if ((err = mp_copy(a, &u3)) != MP_OKAY) { goto _ERR; } + + /* initialize, (v1,v2,v3) = (0,1,b) */ + mp_set(&v2, 1); + if ((err = mp_copy(b, &v3)) != MP_OKAY) { goto _ERR; } + + /* loop while v3 != 0 */ + while (mp_iszero(&v3) == MP_NO) { + /* q = u3/v3 */ + if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) { goto _ERR; } + + /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */ + if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) { goto _ERR; } + + /* (u1,u2,u3) = (v1,v2,v3) */ + if ((err = mp_copy(&v1, &u1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&v2, &u2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&v3, &u3)) != MP_OKAY) { goto _ERR; } + + /* (v1,v2,v3) = (t1,t2,t3) */ + if ((err = mp_copy(&t1, &v1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&t2, &v2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; } + } + + /* copy result out */ + if (U1 != NULL) { mp_exch(U1, &u1); } + if (U2 != NULL) { mp_exch(U2, &u2); } + if (U3 != NULL) { mp_exch(U3, &u3); } + + err = MP_OKAY; +_ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL); + return err; +} diff --git a/bn_mp_fwrite.c b/bn_mp_fwrite.c index 61acfc33e..2853ec1b5 100644 --- a/bn_mp_fwrite.c +++ b/bn_mp_fwrite.c @@ -23,7 +23,7 @@ int mp_fwrite(mp_int *a, int radix, FILE *stream) return err; } - buf = XMALLOC (len); + buf = OPT_CAST(char) XMALLOC (len); if (buf == NULL) { return MP_MEM; } diff --git a/bn_mp_get_int.c b/bn_mp_get_int.c new file mode 100644 index 000000000..41df6e116 --- /dev/null +++ b/bn_mp_get_int.c @@ -0,0 +1,39 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* get the lower 32-bits of an mp_int */ +unsigned long mp_get_int(mp_int * a) +{ + int i; + unsigned long res; + + if (a->used == 0) { + return 0; + } + + /* get number of digits of the lsb we have to read */ + i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1; + + /* get most significant digit of result */ + res = DIGIT(a,i); + + while (--i >= 0) { + res = (res << DIGIT_BIT) | DIGIT(a,i); + } + + /* force result to 32-bits always so it is consistent on non 32-bit platforms */ + return res & 0xFFFFFFFFUL; +} diff --git a/bn_mp_grow.c b/bn_mp_grow.c index 157ac3f05..43a30284b 100644 --- a/bn_mp_grow.c +++ b/bn_mp_grow.c @@ -31,7 +31,7 @@ int mp_grow (mp_int * a, int size) * in case the operation failed we don't want * to overwrite the dp member of a. */ - tmp = OPT_CAST XREALLOC (a->dp, sizeof (mp_digit) * size); + tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size); if (tmp == NULL) { /* reallocation failed but "a" is still valid [can be freed] */ return MP_MEM; diff --git a/bn_mp_init.c b/bn_mp_init.c index 993ce648e..5c5c1adf8 100644 --- a/bn_mp_init.c +++ b/bn_mp_init.c @@ -18,7 +18,7 @@ int mp_init (mp_int * a) { /* allocate memory required and clear it */ - a->dp = OPT_CAST XCALLOC (sizeof (mp_digit), MP_PREC); + a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC); if (a->dp == NULL) { return MP_MEM; } diff --git a/bn_mp_init_set.c b/bn_mp_init_set.c new file mode 100644 index 000000000..c8d8bf825 --- /dev/null +++ b/bn_mp_init_set.c @@ -0,0 +1,26 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* initialize and set a digit */ +int mp_init_set (mp_int * a, mp_digit b) +{ + int err; + if ((err = mp_init(a)) != MP_OKAY) { + return err; + } + mp_set(a, b); + return err; +} diff --git a/bn_mp_init_set_int.c b/bn_mp_init_set_int.c new file mode 100644 index 000000000..2d6628d54 --- /dev/null +++ b/bn_mp_init_set_int.c @@ -0,0 +1,25 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* initialize and set a digit */ +int mp_init_set_int (mp_int * a, unsigned long b) +{ + int err; + if ((err = mp_init(a)) != MP_OKAY) { + return err; + } + return mp_set_int(a, b); +} diff --git a/bn_mp_init_size.c b/bn_mp_init_size.c index be27d0768..c763ee0f2 100644 --- a/bn_mp_init_size.c +++ b/bn_mp_init_size.c @@ -21,7 +21,7 @@ int mp_init_size (mp_int * a, int size) size += (MP_PREC * 2) - (size % MP_PREC); /* alloc mem */ - a->dp = OPT_CAST XCALLOC (sizeof (mp_digit), size); + a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), size); if (a->dp == NULL) { return MP_MEM; } diff --git a/bn_mp_is_square.c b/bn_mp_is_square.c new file mode 100644 index 000000000..1f01bca51 --- /dev/null +++ b/bn_mp_is_square.c @@ -0,0 +1,103 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* Check if remainders are possible squares - fast exclude non-squares */ +static const char rem_128[128] = { + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1 +}; + +static const char rem_105[105] = { + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 +}; + +/* Store non-zero to ret if arg is square, and zero if not */ +int mp_is_square(mp_int *arg,int *ret) +{ + int res; + mp_digit c; + mp_int t; + unsigned long r; + + /* Default to Non-square :) */ + *ret = MP_NO; + + if (arg->sign == MP_NEG) { + return MP_VAL; + } + + /* digits used? (TSD) */ + if (arg->used == 0) { + return MP_OKAY; + } + + /* First check mod 128 (suppose that DIGIT_BIT is at least 7) */ + if (rem_128[127 & DIGIT(arg,0)] == 1) { + return MP_OKAY; + } + + /* Next check mod 105 (3*5*7) */ + if ((res = mp_mod_d(arg,105,&c)) != MP_OKAY) { + return res; + } + if (rem_105[c] == 1) { + return MP_OKAY; + } + + /* product of primes less than 2^31 */ + if ((res = mp_init_set_int(&t,11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) { + return res; + } + if ((res = mp_mod(arg,&t,&t)) != MP_OKAY) { + goto ERR; + } + r = mp_get_int(&t); + /* Check for other prime modules, note it's not an ERROR but we must + * free "t" so the easiest way is to goto ERR. We know that res + * is already equal to MP_OKAY from the mp_mod call + */ + if ( (1L<<(r%11)) & 0x5C4L ) goto ERR; + if ( (1L<<(r%13)) & 0x9E4L ) goto ERR; + if ( (1L<<(r%17)) & 0x5CE8L ) goto ERR; + if ( (1L<<(r%19)) & 0x4F50CL ) goto ERR; + if ( (1L<<(r%23)) & 0x7ACCA0L ) goto ERR; + if ( (1L<<(r%29)) & 0xC2EDD0CL ) goto ERR; + if ( (1L<<(r%31)) & 0x6DE2B848L ) goto ERR; + + /* Final check - is sqr(sqrt(arg)) == arg ? */ + if ((res = mp_sqrt(arg,&t)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sqr(&t,&t)) != MP_OKAY) { + goto ERR; + } + + *ret = (mp_cmp_mag(&t,arg) == MP_EQ) ? MP_YES : MP_NO; +ERR:mp_clear(&t); + return res; +} diff --git a/bn_mp_karatsuba_mul.c b/bn_mp_karatsuba_mul.c index 95353ea85..169dacf90 100644 --- a/bn_mp_karatsuba_mul.c +++ b/bn_mp_karatsuba_mul.c @@ -43,8 +43,7 @@ * Generally though the overhead of this method doesn't pay off * until a certain size (N ~ 80) is reached. */ -int -mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) +int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) { mp_int x0, x1, y0, y1, t1, x0y0, x1y1; int B, err; @@ -56,7 +55,7 @@ mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) B = MIN (a->used, b->used); /* now divide in two */ - B = B / 2; + B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) diff --git a/bn_mp_karatsuba_sqr.c b/bn_mp_karatsuba_sqr.c index 04dd286dd..c33561359 100644 --- a/bn_mp_karatsuba_sqr.c +++ b/bn_mp_karatsuba_sqr.c @@ -21,8 +21,7 @@ * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ -int -mp_karatsuba_sqr (mp_int * a, mp_int * b) +int mp_karatsuba_sqr (mp_int * a, mp_int * b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; @@ -33,7 +32,7 @@ mp_karatsuba_sqr (mp_int * a, mp_int * b) B = a->used; /* now divide in two */ - B = B / 2; + B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) diff --git a/bn_mp_mod.c b/bn_mp_mod.c index d5ebb1698..ad963a976 100644 --- a/bn_mp_mod.c +++ b/bn_mp_mod.c @@ -21,7 +21,6 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c) mp_int t; int res; - if ((res = mp_init (&t)) != MP_OKAY) { return res; } @@ -31,7 +30,7 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c) return res; } - if (t.sign == MP_NEG) { + if (t.sign != b->sign) { res = mp_add (b, &t, c); } else { res = MP_OKAY; diff --git a/bn_mp_n_root.c b/bn_mp_n_root.c index 755468bac..a79af174e 100644 --- a/bn_mp_n_root.c +++ b/bn_mp_n_root.c @@ -101,7 +101,7 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c) if (mp_cmp (&t2, a) == MP_GT) { if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) { - goto __T3; + goto __T3; } } else { break; diff --git a/bn_mp_neg.c b/bn_mp_neg.c index debdbd8ec..f9de6e434 100644 --- a/bn_mp_neg.c +++ b/bn_mp_neg.c @@ -21,7 +21,7 @@ int mp_neg (mp_int * a, mp_int * b) if ((res = mp_copy (a, b)) != MP_OKAY) { return res; } - if (mp_iszero(b) != 1) { + if (mp_iszero(b) != MP_YES) { b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; } return MP_OKAY; diff --git a/bn_mp_prime_random.c b/bn_mp_prime_random.c deleted file mode 100644 index c28859bc1..000000000 --- a/bn_mp_prime_random.c +++ /dev/null @@ -1,74 +0,0 @@ -/* LibTomMath, multiple-precision integer library -- Tom St Denis - * - * LibTomMath is a library that provides multiple-precision - * integer arithmetic as well as number theoretic functionality. - * - * The library was designed directly after the MPI library by - * Michael Fromberger but has been written from scratch with - * additional optimizations in place. - * - * The library is free for all purposes without any express - * guarantee it works. - * - * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org - */ -#include - -/* makes a truly random prime of a given size (bytes), - * call with bbs = 1 if you want it to be congruent to 3 mod 4 - * - * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can - * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself - * so it can be NULL - * - * The prime generated will be larger than 2^(8*size). - */ - -/* this sole function may hold the key to enslaving all mankind! */ -int mp_prime_random(mp_int *a, int t, int size, int bbs, ltm_prime_callback cb, void *dat) -{ - unsigned char *tmp; - int res, err; - - /* sanity check the input */ - if (size <= 0) { - return MP_VAL; - } - - /* we need a buffer of size+1 bytes */ - tmp = XMALLOC(size+1); - if (tmp == NULL) { - return MP_MEM; - } - - /* fix MSB */ - tmp[0] = 1; - - do { - /* read the bytes */ - if (cb(tmp+1, size, dat) != size) { - err = MP_VAL; - goto error; - } - - /* fix the LSB */ - tmp[size] |= (bbs ? 3 : 1); - - /* read it in */ - if ((err = mp_read_unsigned_bin(a, tmp, size+1)) != MP_OKAY) { - goto error; - } - - /* is it prime? */ - if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { - goto error; - } - } while (res == MP_NO); - - err = MP_OKAY; -error: - XFREE(tmp); - return err; -} - - diff --git a/bn_mp_prime_random_ex.c b/bn_mp_prime_random_ex.c new file mode 100644 index 000000000..147721bda --- /dev/null +++ b/bn_mp_prime_random_ex.c @@ -0,0 +1,118 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* makes a truly random prime of a given size (bits), + * + * Flags are as follows: + * + * LTM_PRIME_BBS - make prime congruent to 3 mod 4 + * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS) + * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero + * LTM_PRIME_2MSB_ON - make the 2nd highest bit one + * + * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can + * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself + * so it can be NULL + * + */ + +/* This is possibly the mother of all prime generation functions, muahahahahaha! */ +int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat) +{ + unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb; + int res, err, bsize, maskOR_msb_offset; + + /* sanity check the input */ + if (size <= 1 || t <= 0) { + return MP_VAL; + } + + /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */ + if (flags & LTM_PRIME_SAFE) { + flags |= LTM_PRIME_BBS; + } + + /* calc the byte size */ + bsize = (size>>3)+(size&7?1:0); + + /* we need a buffer of bsize bytes */ + tmp = OPT_CAST(unsigned char) XMALLOC(bsize); + if (tmp == NULL) { + return MP_MEM; + } + + /* calc the maskAND value for the MSbyte*/ + maskAND = 0xFF >> (8 - (size & 7)); + + /* calc the maskOR_msb */ + maskOR_msb = 0; + maskOR_msb_offset = (size - 2) >> 3; + if (flags & LTM_PRIME_2MSB_ON) { + maskOR_msb |= 1 << ((size - 2) & 7); + } else if (flags & LTM_PRIME_2MSB_OFF) { + maskAND &= ~(1 << ((size - 2) & 7)); + } + + /* get the maskOR_lsb */ + maskOR_lsb = 0; + if (flags & LTM_PRIME_BBS) { + maskOR_lsb |= 3; + } + + do { + /* read the bytes */ + if (cb(tmp, bsize, dat) != bsize) { + err = MP_VAL; + goto error; + } + + /* work over the MSbyte */ + tmp[0] &= maskAND; + tmp[0] |= 1 << ((size - 1) & 7); + + /* mix in the maskORs */ + tmp[maskOR_msb_offset] |= maskOR_msb; + tmp[bsize-1] |= maskOR_lsb; + + /* read it in */ + if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; } + + /* is it prime? */ + if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } + + if (flags & LTM_PRIME_SAFE) { + /* see if (a-1)/2 is prime */ + if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; } + if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; } + + /* is it prime? */ + if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } + } + } while (res == MP_NO); + + if (flags & LTM_PRIME_SAFE) { + /* restore a to the original value */ + if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; } + if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; } + } + + err = MP_OKAY; +error: + XFREE(tmp); + return err; +} + + diff --git a/bn_mp_reduce_2k.c b/bn_mp_reduce_2k.c index f73d3b9fa..f6b40cf18 100644 --- a/bn_mp_reduce_2k.c +++ b/bn_mp_reduce_2k.c @@ -14,9 +14,9 @@ */ #include -/* reduces a modulo n where n is of the form 2**p - k */ +/* reduces a modulo n where n is of the form 2**p - d */ int -mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k) +mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) { mp_int q; int p, res; @@ -32,9 +32,9 @@ mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k) goto ERR; } - if (k != 1) { - /* q = q * k */ - if ((res = mp_mul_d(&q, k, &q)) != MP_OKAY) { + if (d != 1) { + /* q = q * d */ + if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { goto ERR; } } diff --git a/bn_mp_shrink.c b/bn_mp_shrink.c index a419b6eba..daefed2b4 100644 --- a/bn_mp_shrink.c +++ b/bn_mp_shrink.c @@ -19,7 +19,7 @@ int mp_shrink (mp_int * a) { mp_digit *tmp; if (a->alloc != a->used && a->used > 0) { - if ((tmp = OPT_CAST XREALLOC (a->dp, sizeof (mp_digit) * a->used)) == NULL) { + if ((tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * a->used)) == NULL) { return MP_MEM; } a->dp = tmp; diff --git a/bn_mp_sqrt.c b/bn_mp_sqrt.c new file mode 100644 index 000000000..ec9d1026c --- /dev/null +++ b/bn_mp_sqrt.c @@ -0,0 +1,75 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* this function is less generic than mp_n_root, simpler and faster */ +int mp_sqrt(mp_int *arg, mp_int *ret) +{ + int res; + mp_int t1,t2; + + /* must be positive */ + if (arg->sign == MP_NEG) { + return MP_VAL; + } + + /* easy out */ + if (mp_iszero(arg) == MP_YES) { + mp_zero(ret); + return MP_OKAY; + } + + if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) { + return res; + } + + if ((res = mp_init(&t2)) != MP_OKAY) { + goto E2; + } + + /* First approx. (not very bad for large arg) */ + mp_rshd (&t1,t1.used/2); + + /* t1 > 0 */ + if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { + goto E1; + } + if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { + goto E1; + } + if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { + goto E1; + } + /* And now t1 > sqrt(arg) */ + do { + if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { + goto E1; + } + if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { + goto E1; + } + if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { + goto E1; + } + /* t1 >= sqrt(arg) >= t2 at this point */ + } while (mp_cmp_mag(&t1,&t2) == MP_GT); + + mp_exch(&t1,ret); + +E1: mp_clear(&t2); +E2: mp_clear(&t1); + return res; +} + diff --git a/bn_mp_toom_mul.c b/bn_mp_toom_mul.c index 2ba46242e..50660ffda 100644 --- a/bn_mp_toom_mul.c +++ b/bn_mp_toom_mul.c @@ -15,8 +15,7 @@ #include /* multiplication using the Toom-Cook 3-way algorithm */ -int -mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) +int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) { mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; int res, B; diff --git a/bn_mp_toradix_n.c b/bn_mp_toradix_n.c new file mode 100644 index 000000000..d2f6ec20c --- /dev/null +++ b/bn_mp_toradix_n.c @@ -0,0 +1,83 @@ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* stores a bignum as a ASCII string in a given radix (2..64) + * + * Stores upto maxlen-1 chars and always a NULL byte + */ +int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) +{ + int res, digs; + mp_int t; + mp_digit d; + char *_s = str; + + /* check range of the maxlen, radix */ + if (maxlen < 3 || radix < 2 || radix > 64) { + return MP_VAL; + } + + /* quick out if its zero */ + if (mp_iszero(a) == 1) { + *str++ = '0'; + *str = '\0'; + return MP_OKAY; + } + + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* if it is negative output a - */ + if (t.sign == MP_NEG) { + /* we have to reverse our digits later... but not the - sign!! */ + ++_s; + + /* store the flag and mark the number as positive */ + *str++ = '-'; + t.sign = MP_ZPOS; + + /* subtract a char */ + --maxlen; + } + + digs = 0; + while (mp_iszero (&t) == 0) { + if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { + mp_clear (&t); + return res; + } + *str++ = mp_s_rmap[d]; + ++digs; + + if (--maxlen == 1) { + /* no more room */ + break; + } + } + + /* reverse the digits of the string. In this case _s points + * to the first digit [exluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + + mp_clear (&t); + return MP_OKAY; +} + diff --git a/bn_s_mp_sqr.c b/bn_s_mp_sqr.c index b28247045..3a00a4ed6 100644 --- a/bn_s_mp_sqr.c +++ b/bn_s_mp_sqr.c @@ -26,8 +26,8 @@ s_mp_sqr (mp_int * a, mp_int * b) pa = a->used; if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { return res; - } - + } + /* default used is maximum possible size */ t.used = 2*pa + 1; diff --git a/changes.txt b/changes.txt index 3998eb8d5..63e9d617a 100644 --- a/changes.txt +++ b/changes.txt @@ -1,13 +1,32 @@ +April 11th, 2004 +v0.30 -- Added "mp_toradix_n" which stores upto "n-1" least significant digits of an mp_int + -- Johan Lindh sent a patch so MSVC wouldn't whine about redefining malloc [in weird dll modes] + -- Henrik Goldman spotted a missing OPT_CAST in mp_fwrite() + -- Tuned tommath.h so that when MP_LOW_MEM is defined MP_PREC shall be reduced. + [I also allow MP_PREC to be externally defined now] + -- Sped up mp_cnt_lsb() by using a 4x4 table [e.g. 4x speedup] + -- Added mp_prime_random_ex() which is a more versatile prime generator accurate to + exact bit lengths (unlike the deprecated but still available mp_prime_random() which + is only accurate to byte lengths). See the new LTM_PRIME_* flags ;-) + -- Alex Polushin contributed an optimized mp_sqrt() as well as mp_get_int() and mp_is_square(). + I've cleaned them all up to be a little more consistent [along with one bug fix] for this release. + -- Added mp_init_set and mp_init_set_int to initialize and set small constants with one function + call. + -- Removed /etclib directory [um LibTomPoly deprecates this]. + -- Fixed mp_mod() so the sign of the result agrees with the sign of the modulus. + ++ N.B. My semester is almost up so expect updates to the textbook to be posted to the libtomcrypt.org + website. + Jan 25th, 2004 v0.29 ++ Note: "Henrik" from the v0.28 changelog refers to Henrik Goldman ;-) -- Added fix to mp_shrink to prevent a realloc when used == 0 [e.g. realloc zero bytes???] - -- Made the mp_prime_rabin_miller_trials() function internal table smaller and also + -- Made the mp_prime_rabin_miller_trials() function internal table smaller and also set the minimum number of tests to two (sounds a bit safer). -- Added a mp_exteuclid() which computes the extended euclidean algorithm. -- Fixed a memory leak in s_mp_exptmod() [called when Barrett reduction is to be used] which would arise - if a multiplication or subsequent reduction failed [would not free the temp result]. - -- Made an API change to mp_radix_size(). It now returns an error code and stores the required size - through an "int star" passed to it. + if a multiplication or subsequent reduction failed [would not free the temp result]. + -- Made an API change to mp_radix_size(). It now returns an error code and stores the required size + through an "int star" passed to it. Dec 24th, 2003 v0.28 -- Henrik Goldman suggested I add casts to the montomgery code [stores into mu...] so compilers wouldn't @@ -18,17 +37,17 @@ v0.28 -- Henrik Goldman suggested I add casts to the montomgery code [stores in (idea from chat with Niels Ferguson at Crypto'03) -- Picked up a second wind. I'm filled with Gooo. Mission Gooo! -- Removed divisions from mp_reduce_is_2k() - -- Sped up mp_div_d() [general case] to use only one division per digit instead of two. + -- Sped up mp_div_d() [general case] to use only one division per digit instead of two. -- Added the heap macros from LTC to LTM. Now you can easily [by editing four lines of tommath.h] change the name of the heap functions used in LTM [also compatible with LTC via MPI mode] -- Added bn_prime_rabin_miller_trials() which gives the number of Rabin-Miller trials to achieve a failure rate of less than 2^-96 -- fixed bug in fast_mp_invmod(). The initial testing logic was wrong. An invalid input is not when - "a" and "b" are even it's when "b" is even [the algo is for odd moduli only]. + "a" and "b" are even it's when "b" is even [the algo is for odd moduli only]. -- Started a new manual [finally]. It is incomplete and will be finished as time goes on. I had to stop - adding full demos around half way in chapter three so I could at least get a good portion of the + adding full demos around half way in chapter three so I could at least get a good portion of the manual done. If you really need help using the library you can always email me! - -- My Textbook is now included as part of the package [all Public Domain] + -- My Textbook is now included as part of the package [all Public Domain] Sept 19th, 2003 v0.27 -- Removed changes.txt~ which was made by accident since "kate" decided it was @@ -41,7 +60,7 @@ v0.27 -- Removed changes.txt~ which was made by accident since "kate" decided i Aug 29th, 2003 v0.26 -- Fixed typo that caused warning with GCC 3.2 - -- Martin Marcel noticed a bug in mp_neg() that allowed negative zeroes. + -- Martin Marcel noticed a bug in mp_neg() that allowed negative zeroes. Also, Martin is the fellow who noted the bugs in mp_gcd() of 0.24/0.25. -- Martin Marcel noticed an optimization [and slight bug] in mp_lcm(). -- Added fix to mp_read_unsigned_bin to prevent a buffer overflow. @@ -102,18 +121,18 @@ v0.22 -- Fixed up mp_invmod so the result is properly in range now [was always June 19th, 2003 v0.21 -- Fixed bug in mp_mul_d which would not handle sign correctly [would not always forward it] - -- Removed the #line lines from gen.pl [was in violation of ISO C] + -- Removed the #line lines from gen.pl [was in violation of ISO C] June 8th, 2003 -v0.20 -- Removed the book from the package. Added the TDCAL license document. +v0.20 -- Removed the book from the package. Added the TDCAL license document. -- This release is officially pure-bred TDCAL again [last officially TDCAL based release was v0.16] June 6th, 2003 v0.19 -- Fixed a bug in mp_montgomery_reduce() which was introduced when I tweaked mp_rshd() in the previous release. Essentially the digits were not trimmed before the compare which cause a subtraction to occur all the time. - -- Fixed up etc/tune.c a bit to stop testing new cutoffs after 16 failures [to find more optimal points]. + -- Fixed up etc/tune.c a bit to stop testing new cutoffs after 16 failures [to find more optimal points]. Brute force ho! - + May 29th, 2003 v0.18 -- Fixed a bug in s_mp_sqr which would handle carries properly just not very elegantly. @@ -121,7 +140,7 @@ v0.18 -- Fixed a bug in s_mp_sqr which would handle carries properly just not v -- Fixed bug in mp_sqr which still had a 512 constant instead of MP_WARRAY -- Added Toom-Cook multipliers [needs tuning!] -- Added efficient divide by 3 algorithm mp_div_3 - -- Re-wrote mp_div_d to be faster than calling mp_div + -- Re-wrote mp_div_d to be faster than calling mp_div -- Added in a donated BCC makefile and a single page LTM poster (ahalhabsi@sbcglobal.net) -- Added mp_reduce_2k which reduces an input modulo n = 2**p - k for any single digit k -- Made the exptmod system be aware of the 2k reduction algorithms. @@ -134,13 +153,13 @@ v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A n -- Fixed bug in mp_exptmod that would cause it to fail for odd moduli when DIGIT_BIT != 28 -- mp_exptmod now also returns errors if the modulus is negative and will handle negative exponents -- mp_prime_is_prime will now return true if the input is one of the primes in the prime table - -- Damian M Gryski (dgryski@uwaterloo.ca) found a index out of bounds error in the - mp_fast_s_mp_mul_high_digs function which didn't come up before. (fixed) + -- Damian M Gryski (dgryski@uwaterloo.ca) found a index out of bounds error in the + mp_fast_s_mp_mul_high_digs function which didn't come up before. (fixed) -- Refactored the DR reduction code so there is only one function per file. -- Fixed bug in the mp_mul() which would erroneously avoid the faster multiplier [comba] when it was allowed. The bug would not cause the incorrect value to be produced just less efficient (fixed) -- Fixed similar bug in the Montgomery reduction code. - -- Added tons of (mp_digit) casts so the 7/15/28/31 bit digit code will work flawlessly out of the box. + -- Added tons of (mp_digit) casts so the 7/15/28/31 bit digit code will work flawlessly out of the box. Also added limited support for 64-bit machines with a 60-bit digit. Both thanks to Tom Wu (tom@arcot.com) -- Added new comments here and there, cleaned up some code [style stuff] -- Fixed a lingering typo in mp_exptmod* that would set bitcnt to zero then one. Very silly stuff :-) @@ -148,19 +167,19 @@ v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A n saves quite a few calls and if statements. -- Added etc/mont.c a test of the Montgomery reduction [assuming all else works :-| ] -- Fixed up etc/tune.c to use a wider test range [more appropriate] also added a x86 based addition which - uses RDTSC for high precision timing. - -- Updated demo/demo.c to remove MPI stuff [won't work anyways], made the tests run for 2 seconds each so its + uses RDTSC for high precision timing. + -- Updated demo/demo.c to remove MPI stuff [won't work anyways], made the tests run for 2 seconds each so its not so insanely slow. Also made the output space delimited [and fixed up various errors] - -- Added logs directory, logs/graph.dem which will use gnuplot to make a series of PNG files - that go with the pre-made index.html. You have to build [via make timing] and run ltmtest first in the + -- Added logs directory, logs/graph.dem which will use gnuplot to make a series of PNG files + that go with the pre-made index.html. You have to build [via make timing] and run ltmtest first in the root of the package. - -- Fixed a bug in mp_sub and mp_add where "-a - -a" or "-a + a" would produce -0 as the result [obviously invalid]. + -- Fixed a bug in mp_sub and mp_add where "-a - -a" or "-a + a" would produce -0 as the result [obviously invalid]. -- Fixed a bug in mp_rshd. If the count == a.used it should zero/return [instead of shifting] -- Fixed a "off-by-one" bug in mp_mul2d. The initial size check on alloc would be off by one if the residue - shifting caused a carry. + shifting caused a carry. -- Fixed a bug where s_mp_mul_digs() would not call the Comba based routine if allowed. This made Barrett reduction slower than it had to be. - + Mar 29th, 2003 v0.16 -- Sped up mp_div by making normalization one shift call -- Sped up mp_mul_2d/mp_div_2d by aliasing pointers :-) @@ -190,9 +209,9 @@ v0.14 -- Tons of manual updates also fixed up timing demo to use a finer granularity for various functions -- fixed up demo/demo.c testing to pause during testing so my Duron won't catch on fire [stays around 31-35C during testing :-)] - + Feb 13th, 2003 -v0.13 -- tons of minor speed-ups in low level add, sub, mul_2 and div_2 which propagate +v0.13 -- tons of minor speed-ups in low level add, sub, mul_2 and div_2 which propagate to other functions like mp_invmod, mp_div, etc... -- Sped up mp_exptmod_fast by using new code to find R mod m [e.g. B^n mod m] -- minor fixes @@ -212,12 +231,12 @@ v0.11 -- More subtle fixes -- Moved to gentoo linux [hurrah!] so made *nix specific fixes to the make process -- Sped up the montgomery reduction code quite a bit -- fixed up demo so when building timing for the x86 it assumes ELF format now - + Jan 9th, 2003 -v0.10 -- Pekka Riikonen suggested fixes to the radix conversion code. +v0.10 -- Pekka Riikonen suggested fixes to the radix conversion code. -- Added baseline montgomery and comba montgomery reductions, sped up exptmods [to a point, see bn.h for MONTGOMERY_EXPT_CUTOFF] - + Jan 6th, 2003 v0.09 -- Updated the manual to reflect recent changes. :-) -- Added Jacobi function (mp_jacobi) to supplement the number theory side of the lib @@ -230,16 +249,16 @@ v0.08 -- Sped up the multipliers by moving the inner loop variables into a smal -- Made "mtest" be able to use /dev/random, /dev/urandom or stdin for RNG data -- Corrected some bugs where error messages were potentially ignored -- add etc/pprime.c program which makes numbers which are provably prime. - + Jan 1st, 2003 v0.07 -- Removed alot of heap operations from core functions to speed them up -- Added a root finding function [and mp_sqrt macro like from MPI] - -- Added more to manual + -- Added more to manual Dec 31st, 2002 v0.06 -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc... -- Cleaned up the header a bit more - + Dec 30th, 2002 v0.05 -- Builds with MSVC out of the box -- Fixed a bug in mp_invmod w.r.t. even moduli @@ -247,19 +266,19 @@ v0.05 -- Builds with MSVC out of the box -- Fixed up exptmod to use fewer multiplications -- Fixed up mp_init_size to use only one heap operation -- Note there is a slight "off-by-one" bug in the library somewhere - without the padding (see the source for comment) the library + without the padding (see the source for comment) the library crashes in libtomcrypt. Anyways a reasonable workaround is to pad the numbers which will always correct it since as the numbers grow the padding will still be beyond the end of the number -- Added more to the manual - + Dec 29th, 2002 v0.04 -- Fixed a memory leak in mp_to_unsigned_bin -- optimized invmod code -- Fixed bug in mp_div -- use exchange instead of copy for results -- added a bit more to the manual - + Dec 27th, 2002 v0.03 -- Sped up s_mp_mul_high_digs by not computing the carries of the lower digits -- Fixed a bug where mp_set_int wouldn't zero the value first and set the used member. @@ -270,7 +289,7 @@ v0.03 -- Sped up s_mp_mul_high_digs by not computing the carries of the lower d -- Many many many many fixes -- Works in LibTomCrypt now :-) -- Added iterations to the timing demos... more accurate. - -- Tom needs a job. + -- Tom needs a job. Dec 26th, 2002 v0.02 -- Fixed a few "slips" in the manual. This is "LibTomMath" afterall :-) @@ -279,7 +298,7 @@ v0.02 -- Fixed a few "slips" in the manual. This is "LibTomMath" afterall :-) Dec 25th,2002 v0.01 -- Initial release. Gimme a break. - -- Todo list, + -- Todo list, add details to manual [e.g. algorithms] more comments in code example programs diff --git a/demo/demo.c b/demo/demo.c index 089b3192b..8014ea82d 100644 --- a/demo/demo.c +++ b/demo/demo.c @@ -1,5 +1,7 @@ #include +#define TESTING + #ifdef IOWNANATHLON #include #define SLEEP sleep(4) @@ -11,8 +13,45 @@ #ifdef TIMER ulong64 _tt; -void reset(void) { _tt = clock(); } -ulong64 rdtsc(void) { return clock() - _tt; } + +#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64) +/* RDTSC from Scott Duplichan */ +static ulong64 TIMFUNC (void) + { + #if defined __GNUC__ + #ifdef __i386__ + ulong64 a; + __asm__ __volatile__ ("rdtsc ":"=A" (a)); + return a; + #else /* gcc-IA64 version */ + unsigned long result; + __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); + while (__builtin_expect ((int) result == -1, 0)) + __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); + return result; + #endif + + // Microsoft and Intel Windows compilers + #elif defined _M_IX86 + __asm rdtsc + #elif defined _M_AMD64 + return __rdtsc (); + #elif defined _M_IA64 + #if defined __INTEL_COMPILER + #include + #endif + return __getReg (3116); + #else + #error need rdtsc function for this build + #endif + } +#else +#define TIMFUNC clock +#endif + +ulong64 rdtsc(void) { return TIMFUNC() - _tt; } +void reset(void) { _tt = TIMFUNC(); } + #endif void ndraw(mp_int *a, char *name) @@ -42,6 +81,13 @@ int lbit(void) } } +int myrng(unsigned char *dst, int len, void *dat) +{ + int x; + for (x = 0; x < len; x++) dst[x] = rand() & 0xFF; + return len; +} + #define DO2(x) x; x; #define DO4(x) DO2(x); DO2(x); @@ -53,13 +99,12 @@ int main(void) { mp_int a, b, c, d, e, f; unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n, - div2_n, mul2_n, add_d_n, sub_d_n; + div2_n, mul2_n, add_d_n, sub_d_n, t; unsigned rr; - int cnt, ix, old_kara_m, old_kara_s; + int i, n, err, cnt, ix, old_kara_m, old_kara_s; #ifdef TIMER - int n; - ulong64 tt; + ulong64 tt, CLK_PER_SEC; FILE *log, *logb, *logc; #endif @@ -72,6 +117,127 @@ int main(void) srand(time(NULL)); +#ifdef TESTING + // test mp_get_int + printf("Testing: mp_get_int\n"); + for(i=0;i<1000;++i) { + t = (unsigned long)rand()*rand()+1; + mp_set_int(&a,t); + if (t!=mp_get_int(&a)) { + printf("mp_get_int() bad result!\n"); + return 1; + } + } + mp_set_int(&a,0); + if (mp_get_int(&a)!=0) + { printf("mp_get_int() bad result!\n"); + return 1; + } + mp_set_int(&a,0xffffffff); + if (mp_get_int(&a)!=0xffffffff) + { printf("mp_get_int() bad result!\n"); + return 1; + } + + // test mp_sqrt + printf("Testing: mp_sqrt\n"); + for (i=0;i<10000;++i) { + printf("%6d\r", i); fflush(stdout); + n = (rand()&15)+1; + mp_rand(&a,n); + if (mp_sqrt(&a,&b) != MP_OKAY) + { printf("mp_sqrt() error!\n"); + return 1; + } + mp_n_root(&a,2,&a); + if (mp_cmp_mag(&b,&a) != MP_EQ) + { printf("mp_sqrt() bad result!\n"); + return 1; + } + } + + printf("\nTesting: mp_is_square\n"); + for (i=0;i<100000;++i) { + printf("%6d\r", i); fflush(stdout); + + /* test mp_is_square false negatives */ + n = (rand()&7)+1; + mp_rand(&a,n); + mp_sqr(&a,&a); + if (mp_is_square(&a,&n)!=MP_OKAY) { + printf("fn:mp_is_square() error!\n"); + return 1; + } + if (n==0) { + printf("fn:mp_is_square() bad result!\n"); + return 1; + } + + /* test for false positives */ + mp_add_d(&a, 1, &a); + if (mp_is_square(&a,&n)!=MP_OKAY) { + printf("fp:mp_is_square() error!\n"); + return 1; + } + if (n==1) { + printf("fp:mp_is_square() bad result!\n"); + return 1; + } + + } + printf("\n\n"); +#endif + +#ifdef TESTING + /* test for size */ + for (ix = 16; ix < 512; ix++) { + printf("Testing (not safe-prime): %9d bits \r", ix); fflush(stdout); + err = mp_prime_random_ex(&a, 8, ix, (rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON, myrng, NULL); + if (err != MP_OKAY) { + printf("failed with err code %d\n", err); + return EXIT_FAILURE; + } + if (mp_count_bits(&a) != ix) { + printf("Prime is %d not %d bits!!!\n", mp_count_bits(&a), ix); + return EXIT_FAILURE; + } + } + + for (ix = 16; ix < 512; ix++) { + printf("Testing ( safe-prime): %9d bits \r", ix); fflush(stdout); + err = mp_prime_random_ex(&a, 8, ix, ((rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON)|LTM_PRIME_SAFE, myrng, NULL); + if (err != MP_OKAY) { + printf("failed with err code %d\n", err); + return EXIT_FAILURE; + } + if (mp_count_bits(&a) != ix) { + printf("Prime is %d not %d bits!!!\n", mp_count_bits(&a), ix); + return EXIT_FAILURE; + } + /* let's see if it's really a safe prime */ + mp_sub_d(&a, 1, &a); + mp_div_2(&a, &a); + mp_prime_is_prime(&a, 8, &cnt); + if (cnt != MP_YES) { + printf("sub is not prime!\n"); + return EXIT_FAILURE; + } + } + + printf("\n\n"); +#endif + +#ifdef TESTING + mp_read_radix(&a, "123456", 10); + mp_toradix_n(&a, buf, 10, 3); + printf("a == %s\n", buf); + mp_toradix_n(&a, buf, 10, 4); + printf("a == %s\n", buf); + mp_toradix_n(&a, buf, 10, 30); + printf("a == %s\n", buf); +#endif + + #if 0 for (;;) { fgets(buf, sizeof(buf), stdin); @@ -97,12 +263,13 @@ int main(void) } #endif -#if 0 +#ifdef TESTING /* test mp_cnt_lsb */ + printf("testing mp_cnt_lsb...\n"); mp_set(&a, 1); - for (ix = 0; ix < 128; ix++) { + for (ix = 0; ix < 1024; ix++) { if (mp_cnt_lsb(&a) != ix) { - printf("Failed at %d\n", ix); + printf("Failed at %d, %d\n", ix, mp_cnt_lsb(&a)); return 0; } mp_mul_2(&a, &a); @@ -110,7 +277,8 @@ int main(void) #endif /* test mp_reduce_2k */ -#if 0 +#ifdef TESTING + printf("Testing mp_reduce_2k...\n"); for (cnt = 3; cnt <= 384; ++cnt) { mp_digit tmp; mp_2expt(&a, cnt); @@ -137,7 +305,8 @@ int main(void) /* test mp_div_3 */ -#if 0 +#ifdef TESTING + printf("Testing mp_div_3...\n"); mp_set(&d, 3); for (cnt = 0; cnt < 1000000; ) { mp_digit r1, r2; @@ -155,7 +324,8 @@ int main(void) #endif /* test the DR reduction */ -#if 0 +#ifdef TESTING + printf("testing mp_dr_reduce...\n"); for (cnt = 2; cnt < 128; cnt++) { printf("%d digit modulus\n", cnt); mp_grow(&a, cnt); @@ -190,7 +360,13 @@ int main(void) #ifdef TIMER /* temp. turn off TOOM */ TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000; - printf("CLOCKS_PER_SEC == %lu\n", CLOCKS_PER_SEC); + + reset(); + sleep(1); + CLK_PER_SEC = rdtsc(); + + printf("CLK_PER_SEC == %lu\n", CLK_PER_SEC); + log = fopen("logs/add.log", "w"); for (cnt = 8; cnt <= 128; cnt += 8) { @@ -202,10 +378,10 @@ int main(void) do { DO(mp_add(&a,&b,&c)); rr += 16; - } while (rdtsc() < (CLOCKS_PER_SEC * 2)); + } while (rdtsc() < (CLK_PER_SEC * 2)); tt = rdtsc(); - printf("Adding\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt, tt); - fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLOCKS_PER_SEC)/tt); + printf("Adding\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt); + fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log); } fclose(log); @@ -219,10 +395,10 @@ int main(void) do { DO(mp_sub(&a,&b,&c)); rr += 16; - } while (rdtsc() < (CLOCKS_PER_SEC * 2)); + } while (rdtsc() < (CLK_PER_SEC * 2)); tt = rdtsc(); - printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt, tt); - fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLOCKS_PER_SEC)/tt); + printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt); + fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log); } fclose(log); @@ -237,7 +413,7 @@ int main(void) KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s; log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w"); - for (cnt = 32; cnt <= 288; cnt += 16) { + for (cnt = 32; cnt <= 288; cnt += 8) { SLEEP; mp_rand(&a, cnt); mp_rand(&b, cnt); @@ -246,15 +422,15 @@ int main(void) do { DO(mp_mul(&a, &b, &c)); rr += 16; - } while (rdtsc() < (CLOCKS_PER_SEC * 2)); + } while (rdtsc() < (CLK_PER_SEC * 2)); tt = rdtsc(); - printf("Multiplying\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt, tt); - fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt); + printf("Multiplying\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt); + fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log); } fclose(log); log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w"); - for (cnt = 32; cnt <= 288; cnt += 16) { + for (cnt = 32; cnt <= 288; cnt += 8) { SLEEP; mp_rand(&a, cnt); reset(); @@ -262,14 +438,15 @@ int main(void) do { DO(mp_sqr(&a, &b)); rr += 16; - } while (rdtsc() < (CLOCKS_PER_SEC * 2)); + } while (rdtsc() < (CLK_PER_SEC * 2)); tt = rdtsc(); - printf("Squaring\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt, tt); - fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt); + printf("Squaring\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt); + fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log); } fclose(log); } +expt_test: { char *primes[] = { /* 2K moduli mersenne primes */ @@ -299,14 +476,12 @@ int main(void) "1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979", NULL }; -expt_test: log = fopen("logs/expt.log", "w"); logb = fopen("logs/expt_dr.log", "w"); logc = fopen("logs/expt_2k.log", "w"); for (n = 0; primes[n]; n++) { SLEEP; mp_read_radix(&a, primes[n], 10); - printf("Different (%d)!!!\n", mp_count_bits(&a)); mp_zero(&b); for (rr = 0; rr < mp_count_bits(&a); rr++) { mp_mul_2(&b, &b); @@ -321,7 +496,7 @@ int main(void) do { DO(mp_exptmod(&c, &b, &a, &d)); rr += 16; - } while (rdtsc() < (CLOCKS_PER_SEC * 2)); + } while (rdtsc() < (CLK_PER_SEC * 2)); tt = rdtsc(); mp_sub_d(&a, 1, &e); mp_sub(&e, &b, &b); @@ -332,8 +507,8 @@ int main(void) draw(&d); exit(0); } - printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt, tt); - fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt); + printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt); + fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); } } fclose(log); @@ -356,15 +531,15 @@ int main(void) do { DO(mp_invmod(&b, &a, &c)); rr += 16; - } while (rdtsc() < (CLOCKS_PER_SEC * 2)); + } while (rdtsc() < (CLK_PER_SEC * 2)); tt = rdtsc(); mp_mulmod(&b, &c, &a, &d); if (mp_cmp_d(&d, 1) != MP_EQ) { printf("Failed to invert\n"); return 0; } - printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLOCKS_PER_SEC)/tt, tt); - fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLOCKS_PER_SEC)/tt); + printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt); + fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); } fclose(log); diff --git a/etc/mont.c b/etc/mont.c index 0de2084d0..dbf17350a 100644 --- a/etc/mont.c +++ b/etc/mont.c @@ -1,46 +1,46 @@ -/* tests the montgomery routines */ -#include - -int main(void) -{ - mp_int modulus, R, p, pp; - mp_digit mp; - long x, y; - - srand(time(NULL)); - mp_init_multi(&modulus, &R, &p, &pp, NULL); - - /* loop through various sizes */ - for (x = 4; x < 256; x++) { - printf("DIGITS == %3ld...", x); fflush(stdout); - - /* make up the odd modulus */ - mp_rand(&modulus, x); - modulus.dp[0] |= 1; - - /* now find the R value */ - mp_montgomery_calc_normalization(&R, &modulus); - mp_montgomery_setup(&modulus, &mp); - - /* now run through a bunch tests */ - for (y = 0; y < 1000; y++) { - mp_rand(&p, x/2); /* p = random */ - mp_mul(&p, &R, &pp); /* pp = R * p */ - mp_montgomery_reduce(&pp, &modulus, mp); - - /* should be equal to p */ - if (mp_cmp(&pp, &p) != MP_EQ) { - printf("FAILURE!\n"); - exit(-1); - } - } - printf("PASSED\n"); - } - - return 0; -} - - - - - +/* tests the montgomery routines */ +#include + +int main(void) +{ + mp_int modulus, R, p, pp; + mp_digit mp; + long x, y; + + srand(time(NULL)); + mp_init_multi(&modulus, &R, &p, &pp, NULL); + + /* loop through various sizes */ + for (x = 4; x < 256; x++) { + printf("DIGITS == %3ld...", x); fflush(stdout); + + /* make up the odd modulus */ + mp_rand(&modulus, x); + modulus.dp[0] |= 1; + + /* now find the R value */ + mp_montgomery_calc_normalization(&R, &modulus); + mp_montgomery_setup(&modulus, &mp); + + /* now run through a bunch tests */ + for (y = 0; y < 1000; y++) { + mp_rand(&p, x/2); /* p = random */ + mp_mul(&p, &R, &pp); /* pp = R * p */ + mp_montgomery_reduce(&pp, &modulus, mp); + + /* should be equal to p */ + if (mp_cmp(&pp, &p) != MP_EQ) { + printf("FAILURE!\n"); + exit(-1); + } + } + printf("PASSED\n"); + } + + return 0; +} + + + + + diff --git a/etclib/poly.c b/etclib/poly.c deleted file mode 100644 index 0c858977b..000000000 --- a/etclib/poly.c +++ /dev/null @@ -1,302 +0,0 @@ -/* LibTomMath, multiple-precision integer library -- Tom St Denis - * - * LibTomMath is library that provides for multiple-precision - * integer arithmetic as well as number theoretic functionality. - * - * This file "poly.c" provides GF(p^k) functionality on top of the - * libtommath library. - * - * The library is designed directly after the MPI library by - * Michael Fromberger but has been written from scratch with - * additional optimizations in place. - * - * The library is free for all purposes without any express - * guarantee it works. - * - * Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca - */ -#include "poly.h" - -#undef MIN -#define MIN(x,y) ((x)<(y)?(x):(y)) -#undef MAX -#define MAX(x,y) ((x)>(y)?(x):(y)) - -static void s_free(mp_poly *a) -{ - int k; - for (k = 0; k < a->alloc; k++) { - mp_clear(&(a->co[k])); - } -} - -static int s_setup(mp_poly *a, int low, int high) -{ - int res, k, j; - for (k = low; k < high; k++) { - if ((res = mp_init(&(a->co[k]))) != MP_OKAY) { - for (j = low; j < k; j++) { - mp_clear(&(a->co[j])); - } - return MP_MEM; - } - } - return MP_OKAY; -} - -int mp_poly_init(mp_poly *a, mp_int *cha) -{ - return mp_poly_init_size(a, cha, MP_POLY_PREC); -} - -/* init a poly of a given (size) degree */ -int mp_poly_init_size(mp_poly *a, mp_int *cha, int size) -{ - int res; - - /* allocate array of mp_ints for coefficients */ - a->co = malloc(size * sizeof(mp_int)); - if (a->co == NULL) { - return MP_MEM; - } - a->used = 0; - a->alloc = size; - - /* now init the range */ - if ((res = s_setup(a, 0, size)) != MP_OKAY) { - free(a->co); - return res; - } - - /* copy characteristic */ - if ((res = mp_init_copy(&(a->cha), cha)) != MP_OKAY) { - s_free(a); - free(a->co); - return res; - } - - /* return ok at this point */ - return MP_OKAY; -} - -/* grow the size of a poly */ -static int mp_poly_grow(mp_poly *a, int size) -{ - int res; - - if (size > a->alloc) { - /* resize the array of coefficients */ - a->co = realloc(a->co, sizeof(mp_int) * size); - if (a->co == NULL) { - return MP_MEM; - } - - /* now setup the coefficients */ - if ((res = s_setup(a, a->alloc, a->alloc + size)) != MP_OKAY) { - return res; - } - - a->alloc += size; - } - return MP_OKAY; -} - -/* copy, b = a */ -int mp_poly_copy(mp_poly *a, mp_poly *b) -{ - int res, k; - - /* resize b */ - if ((res = mp_poly_grow(b, a->used)) != MP_OKAY) { - return res; - } - - /* now copy the used part */ - b->used = a->used; - - /* now the cha */ - if ((res = mp_copy(&(a->cha), &(b->cha))) != MP_OKAY) { - return res; - } - - /* now all the coefficients */ - for (k = 0; k < b->used; k++) { - if ((res = mp_copy(&(a->co[k]), &(b->co[k]))) != MP_OKAY) { - return res; - } - } - - /* now zero the top */ - for (k = b->used; k < b->alloc; k++) { - mp_zero(&(b->co[k])); - } - - return MP_OKAY; -} - -/* init from a copy, a = b */ -int mp_poly_init_copy(mp_poly *a, mp_poly *b) -{ - int res; - - if ((res = mp_poly_init(a, &(b->cha))) != MP_OKAY) { - return res; - } - return mp_poly_copy(b, a); -} - -/* free a poly from ram */ -void mp_poly_clear(mp_poly *a) -{ - s_free(a); - mp_clear(&(a->cha)); - free(a->co); - - a->co = NULL; - a->used = a->alloc = 0; -} - -/* exchange two polys */ -void mp_poly_exch(mp_poly *a, mp_poly *b) -{ - mp_poly t; - t = *a; *a = *b; *b = t; -} - -/* clamp the # of used digits */ -static void mp_poly_clamp(mp_poly *a) -{ - while (a->used > 0 && mp_cmp_d(&(a->co[a->used]), 0) == MP_EQ) --(a->used); -} - -/* add two polynomials, c(x) = a(x) + b(x) */ -int mp_poly_add(mp_poly *a, mp_poly *b, mp_poly *c) -{ - mp_poly t, *x, *y; - int res, k; - - /* ensure char's are the same */ - if (mp_cmp(&(a->cha), &(b->cha)) != MP_EQ) { - return MP_VAL; - } - - /* now figure out the sizes such that x is the - largest degree poly and y is less or equal in degree - */ - if (a->used > b->used) { - x = a; - y = b; - } else { - x = b; - y = a; - } - - /* now init the result to be a copy of the largest */ - if ((res = mp_poly_init_copy(&t, x)) != MP_OKAY) { - return res; - } - - /* now add the coeffcients of the smaller one */ - for (k = 0; k < y->used; k++) { - if ((res = mp_addmod(&(a->co[k]), &(b->co[k]), &(a->cha), &(t.co[k]))) != MP_OKAY) { - goto __T; - } - } - - mp_poly_clamp(&t); - mp_poly_exch(&t, c); - res = MP_OKAY; - -__T: mp_poly_clear(&t); - return res; -} - -/* subtracts two polynomials, c(x) = a(x) - b(x) */ -int mp_poly_sub(mp_poly *a, mp_poly *b, mp_poly *c) -{ - mp_poly t, *x, *y; - int res, k; - - /* ensure char's are the same */ - if (mp_cmp(&(a->cha), &(b->cha)) != MP_EQ) { - return MP_VAL; - } - - /* now figure out the sizes such that x is the - largest degree poly and y is less or equal in degree - */ - if (a->used > b->used) { - x = a; - y = b; - } else { - x = b; - y = a; - } - - /* now init the result to be a copy of the largest */ - if ((res = mp_poly_init_copy(&t, x)) != MP_OKAY) { - return res; - } - - /* now add the coeffcients of the smaller one */ - for (k = 0; k < y->used; k++) { - if ((res = mp_submod(&(a->co[k]), &(b->co[k]), &(a->cha), &(t.co[k]))) != MP_OKAY) { - goto __T; - } - } - - mp_poly_clamp(&t); - mp_poly_exch(&t, c); - res = MP_OKAY; - -__T: mp_poly_clear(&t); - return res; -} - -/* multiplies two polynomials, c(x) = a(x) * b(x) */ -int mp_poly_mul(mp_poly *a, mp_poly *b, mp_poly *c) -{ - mp_poly t; - mp_int tt; - int res, pa, pb, ix, iy; - - /* ensure char's are the same */ - if (mp_cmp(&(a->cha), &(b->cha)) != MP_EQ) { - return MP_VAL; - } - - /* degrees of a and b */ - pa = a->used; - pb = b->used; - - /* now init the temp polynomial to be of degree pa+pb */ - if ((res = mp_poly_init_size(&t, &(a->cha), pa+pb)) != MP_OKAY) { - return res; - } - - /* now init our temp int */ - if ((res = mp_init(&tt)) != MP_OKAY) { - goto __T; - } - - /* now loop through all the digits */ - for (ix = 0; ix < pa; ix++) { - for (iy = 0; iy < pb; iy++) { - if ((res = mp_mul(&(a->co[ix]), &(b->co[iy]), &tt)) != MP_OKAY) { - goto __TT; - } - if ((res = mp_addmod(&tt, &(t.co[ix+iy]), &(a->cha), &(t.co[ix+iy]))) != MP_OKAY) { - goto __TT; - } - } - } - - mp_poly_clamp(&t); - mp_poly_exch(&t, c); - res = MP_OKAY; - -__TT: mp_clear(&tt); -__T: mp_poly_clear(&t); - return res; -} - diff --git a/etclib/poly.h b/etclib/poly.h deleted file mode 100644 index f2e321271..000000000 --- a/etclib/poly.h +++ /dev/null @@ -1,73 +0,0 @@ -/* LibTomMath, multiple-precision integer library -- Tom St Denis - * - * LibTomMath is library that provides for multiple-precision - * integer arithmetic as well as number theoretic functionality. - * - * This file "poly.h" provides GF(p^k) functionality on top of the - * libtommath library. - * - * The library is designed directly after the MPI library by - * Michael Fromberger but has been written from scratch with - * additional optimizations in place. - * - * The library is free for all purposes without any express - * guarantee it works. - * - * Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca - */ - -#ifndef POLY_H_ -#define POLY_H_ - -#include "bn.h" - -/* a mp_poly is basically a derived "class" of a mp_int - * it uses the same technique of growing arrays via - * used/alloc parameters except the base unit or "digit" - * is in fact a mp_int. These hold the coefficients - * of the polynomial - */ -typedef struct { - int used, /* coefficients used */ - alloc; /* coefficients allocated (and initialized) */ - mp_int *co, /* coefficients */ - cha; /* characteristic */ - -} mp_poly; - - -#define MP_POLY_PREC 16 /* default coefficients allocated */ - -/* init a poly */ -int mp_poly_init(mp_poly *a, mp_int *cha); - -/* init a poly of a given (size) degree */ -int mp_poly_init_size(mp_poly *a, mp_int *cha, int size); - -/* copy, b = a */ -int mp_poly_copy(mp_poly *a, mp_poly *b); - -/* init from a copy, a = b */ -int mp_poly_init_copy(mp_poly *a, mp_poly *b); - -/* free a poly from ram */ -void mp_poly_clear(mp_poly *a); - -/* exchange two polys */ -void mp_poly_exch(mp_poly *a, mp_poly *b); - -/* ---> Basic Arithmetic <--- */ - -/* add two polynomials, c(x) = a(x) + b(x) */ -int mp_poly_add(mp_poly *a, mp_poly *b, mp_poly *c); - -/* subtracts two polynomials, c(x) = a(x) - b(x) */ -int mp_poly_sub(mp_poly *a, mp_poly *b, mp_poly *c); - -/* multiplies two polynomials, c(x) = a(x) * b(x) */ -int mp_poly_mul(mp_poly *a, mp_poly *b, mp_poly *c); - - - -#endif - diff --git a/logs/add.log b/logs/add.log index 492158567..e53b415d2 100644 --- a/logs/add.log +++ b/logs/add.log @@ -1,16 +1,16 @@ -224 8622881 -448 7241417 -672 5844712 -896 4938016 -1120 4256543 -1344 3728000 -1568 3328263 -1792 3012161 -2016 2743688 -2240 2512095 -2464 2234464 -2688 1960139 -2912 2013395 -3136 1879636 -3360 1756301 -3584 1680982 +224 20297071 +448 15151383 +672 13088682 +896 11111587 +1120 9240621 +1344 8221878 +1568 7227434 +1792 6718051 +2016 6042524 +2240 5685200 +2464 5240465 +2688 4818032 +2912 4412794 +3136 4155883 +3360 3927078 +3584 3722138 diff --git a/logs/addsub.png b/logs/addsub.png index 3315272a4..e733f8d76 100644 Binary files a/logs/addsub.png and b/logs/addsub.png differ diff --git a/logs/expt.log b/logs/expt.log index e69de29bb..2597b4864 100644 --- a/logs/expt.log +++ b/logs/expt.log @@ -0,0 +1,7 @@ +513 745 +769 282 +1025 130 +2049 20 +2561 11 +3073 6 +4097 2 diff --git a/logs/expt.png b/logs/expt.png index 36cab73e8..59bafa2a8 100644 Binary files a/logs/expt.png and b/logs/expt.png differ diff --git a/logs/expt_2k.log b/logs/expt_2k.log index e69de29bb..f4c282c23 100644 --- a/logs/expt_2k.log +++ b/logs/expt_2k.log @@ -0,0 +1,6 @@ +521 783 +607 585 +1279 138 +2203 39 +3217 15 +4253 6 diff --git a/logs/expt_dr.log b/logs/expt_dr.log index e69de29bb..c552e127d 100644 --- a/logs/expt_dr.log +++ b/logs/expt_dr.log @@ -0,0 +1,7 @@ +532 1296 +784 551 +1036 283 +1540 109 +2072 52 +3080 18 +4116 7 diff --git a/logs/graphs.dem b/logs/graphs.dem index 0553b79a0..d5c9b8adf 100644 --- a/logs/graphs.dem +++ b/logs/graphs.dem @@ -1,4 +1,4 @@ -set terminal png color +set terminal png set size 1.75 set ylabel "Operations per Second" set xlabel "Operand size (bits)" diff --git a/logs/index.html b/logs/index.html index f3a5562cd..19fe403e9 100644 --- a/logs/index.html +++ b/logs/index.html @@ -1,24 +1,24 @@ - - -LibTomMath Log Plots - - - -

Addition and Subtraction

-
-
- -

Multipliers

-
-
- -

Exptmod

-
-
- -

Modular Inverse

-
-
- - + + +LibTomMath Log Plots + + + +

Addition and Subtraction

+
+
+ +

Multipliers

+
+
+ +

Exptmod

+
+
+ +

Modular Inverse

+
+
+ + \ No newline at end of file diff --git a/logs/invmod.log b/logs/invmod.log index 1dec5cd5b..c9294ef66 100644 --- a/logs/invmod.log +++ b/logs/invmod.log @@ -1,32 +1,32 @@ -112 14936 -224 7208 -336 6864 -448 5000 -560 3648 -672 1832 -784 1480 -896 1232 -1008 1010 -1120 1360 -1232 728 -1344 632 -1456 544 -1568 800 -1680 704 -1792 396 -1904 584 -2016 528 -2128 483 -2240 448 -2352 250 -2464 378 -2576 350 -2688 198 -2800 300 -2912 170 -3024 265 -3136 150 -3248 142 -3360 134 -3472 126 -3584 118 +112 17364 +224 8643 +336 8867 +448 6228 +560 4737 +672 2259 +784 2899 +896 1497 +1008 1238 +1120 1010 +1232 870 +1344 1265 +1456 1102 +1568 981 +1680 539 +1792 484 +1904 722 +2016 392 +2128 604 +2240 551 +2352 511 +2464 469 +2576 263 +2688 247 +2800 227 +2912 354 +3024 336 +3136 312 +3248 296 +3360 166 +3472 155 +3584 248 diff --git a/logs/invmod.png b/logs/invmod.png index 24866d5b6..baa287fe2 100644 Binary files a/logs/invmod.png and b/logs/invmod.png differ diff --git a/logs/k7/index.html b/logs/k7/index.html index f3a5562cd..19fe403e9 100644 --- a/logs/k7/index.html +++ b/logs/k7/index.html @@ -1,24 +1,24 @@ - - -LibTomMath Log Plots - - - -

Addition and Subtraction

-
-
- -

Multipliers

-
-
- -

Exptmod

-
-
- -

Modular Inverse

-
-
- - + + +LibTomMath Log Plots + + + +

Addition and Subtraction

+
+
+ +

Multipliers

+
+
+ +

Exptmod

+
+
+ +

Modular Inverse

+
+
+ + \ No newline at end of file diff --git a/logs/mult.log b/logs/mult.log index 050174783..d4f589981 100644 --- a/logs/mult.log +++ b/logs/mult.log @@ -1,17 +1,33 @@ -896 348504 -1344 165040 -1792 98696 -2240 65400 -2688 46672 -3136 34968 -3584 27144 -4032 21648 -4480 17672 -4928 14768 -5376 12416 -5824 10696 -6272 9184 -6720 8064 -7168 1896 -7616 1680 -8064 1504 +920 374785 +1142 242737 +1371 176704 +1596 134341 +1816 105537 +2044 85089 +2268 70051 +2490 58671 +2716 49851 +2937 42881 +3162 37288 +3387 32697 +3608 28915 +3836 25759 +4057 23088 +4284 20800 +4508 18827 +4730 17164 +4956 15689 +5180 14397 +5398 13260 +5628 12249 +5852 11346 +6071 10537 +6298 9812 +6522 9161 +6742 8572 +6971 8038 +7195 2915 +7419 2744 +7644 2587 +7866 2444 +8090 2311 diff --git a/logs/mult.png b/logs/mult.png index ddc768041..d304db2ca 100644 Binary files a/logs/mult.png and b/logs/mult.png differ diff --git a/logs/mult_kara.log b/logs/mult_kara.log index 6fd8ada63..6edc43945 100644 --- a/logs/mult_kara.log +++ b/logs/mult_kara.log @@ -1,17 +1,33 @@ -896 301784 -1344 141568 -1792 84592 -2240 55864 -2688 39576 -3136 30088 -3584 24032 -4032 19760 -4480 16536 -4928 13376 -5376 11880 -5824 10256 -6272 9160 -6720 8208 -7168 7384 -7616 6664 -8064 6112 +924 374171 +1147 243163 +1371 177111 +1596 134465 +1819 105619 +2044 85145 +2266 70086 +2488 58717 +2715 49869 +2939 42894 +3164 37389 +3387 33510 +3610 29993 +3836 27205 +4060 24751 +4281 22576 +4508 20670 +4732 19019 +4954 17527 +5180 16217 +5404 15044 +5624 14003 +5849 13051 +6076 12067 +6300 11438 +6524 10772 +6748 10298 +6972 9715 +7195 9330 +7416 8836 +7644 8465 +7864 8042 +8091 7735 diff --git a/logs/p4/index.html b/logs/p4/index.html index f3a5562cd..19fe403e9 100644 --- a/logs/p4/index.html +++ b/logs/p4/index.html @@ -1,24 +1,24 @@ - - -LibTomMath Log Plots - - - -

Addition and Subtraction

-
-
- -

Multipliers

-
-
- -

Exptmod

-
-
- -

Modular Inverse

-
-
- - + + +LibTomMath Log Plots + + + +

Addition and Subtraction

+
+
+ +

Multipliers

+
+
+ +

Exptmod

+
+
+ +

Modular Inverse

+
+
+ + \ No newline at end of file diff --git a/logs/sqr.log b/logs/sqr.log index ae1a929a4..81fa612fd 100644 --- a/logs/sqr.log +++ b/logs/sqr.log @@ -1,17 +1,33 @@ -911 167013 -1359 83796 -1807 50308 -2254 33637 -2703 24067 -3151 17997 -3599 5751 -4047 4561 -4490 3714 -4943 3067 -5391 2597 -5839 2204 -6286 1909 -6735 1637 -7183 1461 -7631 1302 -8078 1158 +922 471095 +1147 337137 +1366 254327 +1596 199732 +1819 161225 +2044 132852 +2268 111493 +2490 94864 +2715 81745 +2940 71187 +3162 62575 +3387 55418 +3612 14540 +3836 12944 +4060 11627 +4281 10546 +4508 9502 +4730 8688 +4954 7937 +5180 7273 +5402 6701 +5627 6189 +5850 5733 +6076 5310 +6300 4933 +6522 4631 +6748 4313 +6971 4064 +7196 3801 +7420 3576 +7642 3388 +7868 3191 +8092 3020 diff --git a/logs/sqr_kara.log b/logs/sqr_kara.log index 3c85942c3..3b547cf34 100644 --- a/logs/sqr_kara.log +++ b/logs/sqr_kara.log @@ -1,17 +1,33 @@ -910 165312 -1358 84355 -1806 50316 -2255 33661 -2702 24027 -3151 18068 -3599 14721 -4046 12101 -4493 10112 -4942 8591 -5390 7364 -5839 6398 -6285 5607 -6735 4952 -7182 4625 -7631 4193 -8079 3810 +922 470930 +1148 337217 +1372 254433 +1596 199827 +1820 161204 +2043 132871 +2267 111522 +2488 94932 +2714 81814 +2939 71231 +3164 62616 +3385 55467 +3611 44426 +3836 40695 +4060 37391 +4283 34371 +4508 31779 +4732 29499 +4956 27426 +5177 25598 +5403 23944 +5628 22416 +5851 21052 +6076 19781 +6299 18588 +6523 17539 +6746 16618 +6972 15705 +7196 13582 +7420 13004 +7643 12496 +7868 11963 +8092 11497 diff --git a/logs/sub.log b/logs/sub.log index bf1d36f3c..f1ade94b6 100644 --- a/logs/sub.log +++ b/logs/sub.log @@ -1,16 +1,16 @@ -224 10295756 -448 7577910 -672 6279588 -896 5345182 -1120 4646989 -1344 4101759 -1568 3685447 -1792 3337497 -2016 3051095 -2240 2811900 -2464 2605371 -2688 2420561 -2912 2273174 -3136 2134662 -3360 2014354 -3584 1901723 +224 16370431 +448 13327848 +672 11009401 +896 9125342 +1120 7930419 +1344 7114040 +1568 6506998 +1792 5899346 +2016 5435327 +2240 5038931 +2464 4696364 +2688 4425678 +2912 4134476 +3136 3913280 +3360 3692536 +3584 3505219 diff --git a/makefile b/makefile index c7e2bc7e9..07b7842b3 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,7 @@ #Makefile for GCC # #Tom St Denis -CFLAGS += -I./ -Wall -W -Wshadow +CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare #for speed CFLAGS += -O3 -funroll-loops @@ -12,7 +12,7 @@ CFLAGS += -O3 -funroll-loops #x86 optimizations [should be valid for any GCC install though] CFLAGS += -fomit-frame-pointer -VERSION=0.29 +VERSION=0.30 default: libtommath.a @@ -50,7 +50,9 @@ bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \ bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \ bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \ bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \ -bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_prime_random.o bn_prime_sizes_tab.o bn_mp_exteuclid.o +bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \ +bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \ +bn_mp_init_set_int.o libtommath.a: $(OBJECTS) $(AR) $(ARFLAGS) libtommath.a $(OBJECTS) @@ -64,6 +66,8 @@ install: libtommath.a test: libtommath.a demo/demo.o $(CC) demo/demo.o libtommath.a -o test + +mtest: test cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -s timing: libtommath.a @@ -75,23 +79,18 @@ docdvi: tommath.src echo "hello" > tommath.ind perl booker.pl latex tommath > /dev/null + latex tommath > /dev/null makeindex tommath latex tommath > /dev/null - + # poster, makes the single page PDF poster poster: poster.tex pdflatex poster rm -f poster.aux poster.log -# makes the LTM book PS/PDF file, requires tetex, cleans up the LaTeX temp files -docs: - cd pics ; make pdfes - echo "hello" > tommath.ind - perl booker.pl PDF - latex tommath > /dev/null - makeindex tommath - latex tommath > /dev/null - pdflatex tommath +# makes the LTM book PDF file, requires tetex, cleans up the LaTeX temp files +docs: docdvi + dvipdf tommath rm -f tommath.log tommath.aux tommath.dvi tommath.idx tommath.toc tommath.lof tommath.ind tommath.ilg cd pics ; make clean @@ -99,6 +98,7 @@ docs: mandvi: bn.tex echo "hello" > bn.ind latex bn > /dev/null + latex bn > /dev/null makeindex bn latex bn > /dev/null diff --git a/makefile.bcc b/makefile.bcc index 7aba87c9a..6874d2f3f 100644 --- a/makefile.bcc +++ b/makefile.bcc @@ -29,12 +29,13 @@ bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \ bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \ bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \ -bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_mp_prime_random.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj +bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \ +bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj TARGET = libtommath.lib $(TARGET): $(OBJECTS) -.c.objbj: +.c.objbjbjbj: $(CC) $(CFLAGS) $< $(LIB) $(TARGET) -+$@ diff --git a/makefile.cygwin_dll b/makefile.cygwin_dll index c7cfc13f1..e5ab814c4 100644 --- a/makefile.cygwin_dll +++ b/makefile.cygwin_dll @@ -34,7 +34,8 @@ bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \ bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \ bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \ bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \ -bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_prime_random.o bn_prime_sizes_tab.o bn_mp_exteuclid.o +bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \ +bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o # make a Windows DLL via Cygwin windll: $(OBJECTS) diff --git a/makefile.msvc b/makefile.msvc index 178987136..beeb77e49 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -28,7 +28,8 @@ bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \ bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \ bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \ -bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_mp_prime_random.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj +bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \ +bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj library: $(OBJECTS) lib /out:tommath.lib $(OBJECTS) diff --git a/mtest/logtab.h b/mtest/logtab.h index 6ed1baef9..68462bdb8 100644 --- a/mtest/logtab.h +++ b/mtest/logtab.h @@ -1,20 +1,20 @@ -const float s_logv_2[] = { - 0.000000000, 0.000000000, 1.000000000, 0.630929754, /* 0 1 2 3 */ - 0.500000000, 0.430676558, 0.386852807, 0.356207187, /* 4 5 6 7 */ - 0.333333333, 0.315464877, 0.301029996, 0.289064826, /* 8 9 10 11 */ - 0.278942946, 0.270238154, 0.262649535, 0.255958025, /* 12 13 14 15 */ - 0.250000000, 0.244650542, 0.239812467, 0.235408913, /* 16 17 18 19 */ - 0.231378213, 0.227670249, 0.224243824, 0.221064729, /* 20 21 22 23 */ - 0.218104292, 0.215338279, 0.212746054, 0.210309918, /* 24 25 26 27 */ - 0.208014598, 0.205846832, 0.203795047, 0.201849087, /* 28 29 30 31 */ - 0.200000000, 0.198239863, 0.196561632, 0.194959022, /* 32 33 34 35 */ - 0.193426404, 0.191958720, 0.190551412, 0.189200360, /* 36 37 38 39 */ - 0.187901825, 0.186652411, 0.185449023, 0.184288833, /* 40 41 42 43 */ - 0.183169251, 0.182087900, 0.181042597, 0.180031327, /* 44 45 46 47 */ - 0.179052232, 0.178103594, 0.177183820, 0.176291434, /* 48 49 50 51 */ - 0.175425064, 0.174583430, 0.173765343, 0.172969690, /* 52 53 54 55 */ - 0.172195434, 0.171441601, 0.170707280, 0.169991616, /* 56 57 58 59 */ - 0.169293808, 0.168613099, 0.167948779, 0.167300179, /* 60 61 62 63 */ - 0.166666667 -}; - +const float s_logv_2[] = { + 0.000000000, 0.000000000, 1.000000000, 0.630929754, /* 0 1 2 3 */ + 0.500000000, 0.430676558, 0.386852807, 0.356207187, /* 4 5 6 7 */ + 0.333333333, 0.315464877, 0.301029996, 0.289064826, /* 8 9 10 11 */ + 0.278942946, 0.270238154, 0.262649535, 0.255958025, /* 12 13 14 15 */ + 0.250000000, 0.244650542, 0.239812467, 0.235408913, /* 16 17 18 19 */ + 0.231378213, 0.227670249, 0.224243824, 0.221064729, /* 20 21 22 23 */ + 0.218104292, 0.215338279, 0.212746054, 0.210309918, /* 24 25 26 27 */ + 0.208014598, 0.205846832, 0.203795047, 0.201849087, /* 28 29 30 31 */ + 0.200000000, 0.198239863, 0.196561632, 0.194959022, /* 32 33 34 35 */ + 0.193426404, 0.191958720, 0.190551412, 0.189200360, /* 36 37 38 39 */ + 0.187901825, 0.186652411, 0.185449023, 0.184288833, /* 40 41 42 43 */ + 0.183169251, 0.182087900, 0.181042597, 0.180031327, /* 44 45 46 47 */ + 0.179052232, 0.178103594, 0.177183820, 0.176291434, /* 48 49 50 51 */ + 0.175425064, 0.174583430, 0.173765343, 0.172969690, /* 52 53 54 55 */ + 0.172195434, 0.171441601, 0.170707280, 0.169991616, /* 56 57 58 59 */ + 0.169293808, 0.168613099, 0.167948779, 0.167300179, /* 60 61 62 63 */ + 0.166666667 +}; + diff --git a/mtest/mpi-config.h b/mtest/mpi-config.h index 6efe574b2..9277dfb1e 100644 --- a/mtest/mpi-config.h +++ b/mtest/mpi-config.h @@ -1,86 +1,86 @@ -/* Default configuration for MPI library */ -/* $ID$ */ - -#ifndef MPI_CONFIG_H_ -#define MPI_CONFIG_H_ - -/* - For boolean options, - 0 = no - 1 = yes - - Other options are documented individually. - - */ - -#ifndef MP_IOFUNC -#define MP_IOFUNC 0 /* include mp_print() ? */ -#endif - -#ifndef MP_MODARITH -#define MP_MODARITH 1 /* include modular arithmetic ? */ -#endif - -#ifndef MP_NUMTH -#define MP_NUMTH 1 /* include number theoretic functions? */ -#endif - -#ifndef MP_LOGTAB -#define MP_LOGTAB 1 /* use table of logs instead of log()? */ -#endif - -#ifndef MP_MEMSET -#define MP_MEMSET 1 /* use memset() to zero buffers? */ -#endif - -#ifndef MP_MEMCPY -#define MP_MEMCPY 1 /* use memcpy() to copy buffers? */ -#endif - -#ifndef MP_CRYPTO -#define MP_CRYPTO 1 /* erase memory on free? */ -#endif - -#ifndef MP_ARGCHK -/* - 0 = no parameter checks - 1 = runtime checks, continue execution and return an error to caller - 2 = assertions; dump core on parameter errors - */ -#define MP_ARGCHK 2 /* how to check input arguments */ -#endif - -#ifndef MP_DEBUG -#define MP_DEBUG 0 /* print diagnostic output? */ -#endif - -#ifndef MP_DEFPREC -#define MP_DEFPREC 64 /* default precision, in digits */ -#endif - -#ifndef MP_MACRO -#define MP_MACRO 1 /* use macros for frequent calls? */ -#endif - -#ifndef MP_SQUARE -#define MP_SQUARE 1 /* use separate squaring code? */ -#endif - -#ifndef MP_PTAB_SIZE -/* - When building mpprime.c, we build in a table of small prime - values to use for primality testing. The more you include, - the more space they take up. See primes.c for the possible - values (currently 16, 32, 64, 128, 256, and 6542) - */ -#define MP_PTAB_SIZE 128 /* how many built-in primes? */ -#endif - -#ifndef MP_COMPAT_MACROS -#define MP_COMPAT_MACROS 1 /* define compatibility macros? */ -#endif - -#endif /* ifndef MPI_CONFIG_H_ */ - - -/* crc==3287762869, version==2, Sat Feb 02 06:43:53 2002 */ +/* Default configuration for MPI library */ +/* $ID$ */ + +#ifndef MPI_CONFIG_H_ +#define MPI_CONFIG_H_ + +/* + For boolean options, + 0 = no + 1 = yes + + Other options are documented individually. + + */ + +#ifndef MP_IOFUNC +#define MP_IOFUNC 0 /* include mp_print() ? */ +#endif + +#ifndef MP_MODARITH +#define MP_MODARITH 1 /* include modular arithmetic ? */ +#endif + +#ifndef MP_NUMTH +#define MP_NUMTH 1 /* include number theoretic functions? */ +#endif + +#ifndef MP_LOGTAB +#define MP_LOGTAB 1 /* use table of logs instead of log()? */ +#endif + +#ifndef MP_MEMSET +#define MP_MEMSET 1 /* use memset() to zero buffers? */ +#endif + +#ifndef MP_MEMCPY +#define MP_MEMCPY 1 /* use memcpy() to copy buffers? */ +#endif + +#ifndef MP_CRYPTO +#define MP_CRYPTO 1 /* erase memory on free? */ +#endif + +#ifndef MP_ARGCHK +/* + 0 = no parameter checks + 1 = runtime checks, continue execution and return an error to caller + 2 = assertions; dump core on parameter errors + */ +#define MP_ARGCHK 2 /* how to check input arguments */ +#endif + +#ifndef MP_DEBUG +#define MP_DEBUG 0 /* print diagnostic output? */ +#endif + +#ifndef MP_DEFPREC +#define MP_DEFPREC 64 /* default precision, in digits */ +#endif + +#ifndef MP_MACRO +#define MP_MACRO 1 /* use macros for frequent calls? */ +#endif + +#ifndef MP_SQUARE +#define MP_SQUARE 1 /* use separate squaring code? */ +#endif + +#ifndef MP_PTAB_SIZE +/* + When building mpprime.c, we build in a table of small prime + values to use for primality testing. The more you include, + the more space they take up. See primes.c for the possible + values (currently 16, 32, 64, 128, 256, and 6542) + */ +#define MP_PTAB_SIZE 128 /* how many built-in primes? */ +#endif + +#ifndef MP_COMPAT_MACROS +#define MP_COMPAT_MACROS 1 /* define compatibility macros? */ +#endif + +#endif /* ifndef MPI_CONFIG_H_ */ + + +/* crc==3287762869, version==2, Sat Feb 02 06:43:53 2002 */ diff --git a/mtest/mpi.c b/mtest/mpi.c index 96066c33e..94019ef67 100644 --- a/mtest/mpi.c +++ b/mtest/mpi.c @@ -1,3981 +1,3981 @@ -/* - mpi.c - - by Michael J. Fromberger - Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved - - Arbitrary precision integer arithmetic library - - $ID$ - */ - -#include "mpi.h" -#include -#include -#include - -#if MP_DEBUG -#include - -#define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);} -#else -#define DIAG(T,V) -#endif - -/* - If MP_LOGTAB is not defined, use the math library to compute the - logarithms on the fly. Otherwise, use the static table below. - Pick which works best for your system. - */ -#if MP_LOGTAB - -/* {{{ s_logv_2[] - log table for 2 in various bases */ - -/* - A table of the logs of 2 for various bases (the 0 and 1 entries of - this table are meaningless and should not be referenced). - - This table is used to compute output lengths for the mp_toradix() - function. Since a number n in radix r takes up about log_r(n) - digits, we estimate the output size by taking the least integer - greater than log_r(n), where: - - log_r(n) = log_2(n) * log_r(2) - - This table, therefore, is a table of log_r(2) for 2 <= r <= 36, - which are the output bases supported. - */ - -#include "logtab.h" - -/* }}} */ -#define LOG_V_2(R) s_logv_2[(R)] - -#else - -#include -#define LOG_V_2(R) (log(2.0)/log(R)) - -#endif - -/* Default precision for newly created mp_int's */ -static unsigned int s_mp_defprec = MP_DEFPREC; - -/* {{{ Digit arithmetic macros */ - -/* - When adding and multiplying digits, the results can be larger than - can be contained in an mp_digit. Thus, an mp_word is used. These - macros mask off the upper and lower digits of the mp_word (the - mp_word may be more than 2 mp_digits wide, but we only concern - ourselves with the low-order 2 mp_digits) - - If your mp_word DOES have more than 2 mp_digits, you need to - uncomment the first line, and comment out the second. - */ - -/* #define CARRYOUT(W) (((W)>>DIGIT_BIT)&MP_DIGIT_MAX) */ -#define CARRYOUT(W) ((W)>>DIGIT_BIT) -#define ACCUM(W) ((W)&MP_DIGIT_MAX) - -/* }}} */ - -/* {{{ Comparison constants */ - -#define MP_LT -1 -#define MP_EQ 0 -#define MP_GT 1 - -/* }}} */ - -/* {{{ Constant strings */ - -/* Constant strings returned by mp_strerror() */ -static const char *mp_err_string[] = { - "unknown result code", /* say what? */ - "boolean true", /* MP_OKAY, MP_YES */ - "boolean false", /* MP_NO */ - "out of memory", /* MP_MEM */ - "argument out of range", /* MP_RANGE */ - "invalid input parameter", /* MP_BADARG */ - "result is undefined" /* MP_UNDEF */ -}; - -/* Value to digit maps for radix conversion */ - -/* s_dmap_1 - standard digits and letters */ -static const char *s_dmap_1 = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; - -#if 0 -/* s_dmap_2 - base64 ordering for digits */ -static const char *s_dmap_2 = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -#endif - -/* }}} */ - -/* {{{ Static function declarations */ - -/* - If MP_MACRO is false, these will be defined as actual functions; - otherwise, suitable macro definitions will be used. This works - around the fact that ANSI C89 doesn't support an 'inline' keyword - (although I hear C9x will ... about bloody time). At present, the - macro definitions are identical to the function bodies, but they'll - expand in place, instead of generating a function call. - - I chose these particular functions to be made into macros because - some profiling showed they are called a lot on a typical workload, - and yet they are primarily housekeeping. - */ -#if MP_MACRO == 0 - void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */ - void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count); /* copy */ - void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */ - void s_mp_free(void *ptr); /* general free function */ -#else - - /* Even if these are defined as macros, we need to respect the settings - of the MP_MEMSET and MP_MEMCPY configuration options... - */ - #if MP_MEMSET == 0 - #define s_mp_setz(dp, count) \ - {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;} - #else - #define s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit)) - #endif /* MP_MEMSET */ - - #if MP_MEMCPY == 0 - #define s_mp_copy(sp, dp, count) \ - {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];} - #else - #define s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit)) - #endif /* MP_MEMCPY */ - - #define s_mp_alloc(nb, ni) calloc(nb, ni) - #define s_mp_free(ptr) {if(ptr) free(ptr);} -#endif /* MP_MACRO */ - -mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */ -mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */ - -void s_mp_clamp(mp_int *mp); /* clip leading zeroes */ - -void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */ - -mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */ -void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */ -void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */ -void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */ -mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place*/ -void s_mp_div_2(mp_int *mp); /* divide by 2 in place */ -mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */ -mp_digit s_mp_norm(mp_int *a, mp_int *b); /* normalize for division */ -mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */ -mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */ -mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */ -mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r); - /* unsigned digit divide */ -mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu); - /* Barrett reduction */ -mp_err s_mp_add(mp_int *a, mp_int *b); /* magnitude addition */ -mp_err s_mp_sub(mp_int *a, mp_int *b); /* magnitude subtract */ -mp_err s_mp_mul(mp_int *a, mp_int *b); /* magnitude multiply */ -#if 0 -void s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len); - /* multiply buffers in place */ -#endif -#if MP_SQUARE -mp_err s_mp_sqr(mp_int *a); /* magnitude square */ -#else -#define s_mp_sqr(a) s_mp_mul(a, a) -#endif -mp_err s_mp_div(mp_int *a, mp_int *b); /* magnitude divide */ -mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */ -int s_mp_cmp(mp_int *a, mp_int *b); /* magnitude comparison */ -int s_mp_cmp_d(mp_int *a, mp_digit d); /* magnitude digit compare */ -int s_mp_ispow2(mp_int *v); /* is v a power of 2? */ -int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */ - -int s_mp_tovalue(char ch, int r); /* convert ch to value */ -char s_mp_todigit(int val, int r, int low); /* convert val to digit */ -int s_mp_outlen(int bits, int r); /* output length in bytes */ - -/* }}} */ - -/* {{{ Default precision manipulation */ - -unsigned int mp_get_prec(void) -{ - return s_mp_defprec; - -} /* end mp_get_prec() */ - -void mp_set_prec(unsigned int prec) -{ - if(prec == 0) - s_mp_defprec = MP_DEFPREC; - else - s_mp_defprec = prec; - -} /* end mp_set_prec() */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ mp_init(mp) */ - -/* - mp_init(mp) - - Initialize a new zero-valued mp_int. Returns MP_OKAY if successful, - MP_MEM if memory could not be allocated for the structure. - */ - -mp_err mp_init(mp_int *mp) -{ - return mp_init_size(mp, s_mp_defprec); - -} /* end mp_init() */ - -/* }}} */ - -/* {{{ mp_init_array(mp[], count) */ - -mp_err mp_init_array(mp_int mp[], int count) -{ - mp_err res; - int pos; - - ARGCHK(mp !=NULL && count > 0, MP_BADARG); - - for(pos = 0; pos < count; ++pos) { - if((res = mp_init(&mp[pos])) != MP_OKAY) - goto CLEANUP; - } - - return MP_OKAY; - - CLEANUP: - while(--pos >= 0) - mp_clear(&mp[pos]); - - return res; - -} /* end mp_init_array() */ - -/* }}} */ - -/* {{{ mp_init_size(mp, prec) */ - -/* - mp_init_size(mp, prec) - - Initialize a new zero-valued mp_int with at least the given - precision; returns MP_OKAY if successful, or MP_MEM if memory could - not be allocated for the structure. - */ - -mp_err mp_init_size(mp_int *mp, mp_size prec) -{ - ARGCHK(mp != NULL && prec > 0, MP_BADARG); - - if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL) - return MP_MEM; - - SIGN(mp) = MP_ZPOS; - USED(mp) = 1; - ALLOC(mp) = prec; - - return MP_OKAY; - -} /* end mp_init_size() */ - -/* }}} */ - -/* {{{ mp_init_copy(mp, from) */ - -/* - mp_init_copy(mp, from) - - Initialize mp as an exact copy of from. Returns MP_OKAY if - successful, MP_MEM if memory could not be allocated for the new - structure. - */ - -mp_err mp_init_copy(mp_int *mp, mp_int *from) -{ - ARGCHK(mp != NULL && from != NULL, MP_BADARG); - - if(mp == from) - return MP_OKAY; - - if((DIGITS(mp) = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL) - return MP_MEM; - - s_mp_copy(DIGITS(from), DIGITS(mp), USED(from)); - USED(mp) = USED(from); - ALLOC(mp) = USED(from); - SIGN(mp) = SIGN(from); - - return MP_OKAY; - -} /* end mp_init_copy() */ - -/* }}} */ - -/* {{{ mp_copy(from, to) */ - -/* - mp_copy(from, to) - - Copies the mp_int 'from' to the mp_int 'to'. It is presumed that - 'to' has already been initialized (if not, use mp_init_copy() - instead). If 'from' and 'to' are identical, nothing happens. - */ - -mp_err mp_copy(mp_int *from, mp_int *to) -{ - ARGCHK(from != NULL && to != NULL, MP_BADARG); - - if(from == to) - return MP_OKAY; - - { /* copy */ - mp_digit *tmp; - - /* - If the allocated buffer in 'to' already has enough space to hold - all the used digits of 'from', we'll re-use it to avoid hitting - the memory allocater more than necessary; otherwise, we'd have - to grow anyway, so we just allocate a hunk and make the copy as - usual - */ - if(ALLOC(to) >= USED(from)) { - s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from)); - s_mp_copy(DIGITS(from), DIGITS(to), USED(from)); - - } else { - if((tmp = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL) - return MP_MEM; - - s_mp_copy(DIGITS(from), tmp, USED(from)); - - if(DIGITS(to) != NULL) { -#if MP_CRYPTO - s_mp_setz(DIGITS(to), ALLOC(to)); -#endif - s_mp_free(DIGITS(to)); - } - - DIGITS(to) = tmp; - ALLOC(to) = USED(from); - } - - /* Copy the precision and sign from the original */ - USED(to) = USED(from); - SIGN(to) = SIGN(from); - } /* end copy */ - - return MP_OKAY; - -} /* end mp_copy() */ - -/* }}} */ - -/* {{{ mp_exch(mp1, mp2) */ - -/* - mp_exch(mp1, mp2) - - Exchange mp1 and mp2 without allocating any intermediate memory - (well, unless you count the stack space needed for this call and the - locals it creates...). This cannot fail. - */ - -void mp_exch(mp_int *mp1, mp_int *mp2) -{ -#if MP_ARGCHK == 2 - assert(mp1 != NULL && mp2 != NULL); -#else - if(mp1 == NULL || mp2 == NULL) - return; -#endif - - s_mp_exch(mp1, mp2); - -} /* end mp_exch() */ - -/* }}} */ - -/* {{{ mp_clear(mp) */ - -/* - mp_clear(mp) - - Release the storage used by an mp_int, and void its fields so that - if someone calls mp_clear() again for the same int later, we won't - get tollchocked. - */ - -void mp_clear(mp_int *mp) -{ - if(mp == NULL) - return; - - if(DIGITS(mp) != NULL) { -#if MP_CRYPTO - s_mp_setz(DIGITS(mp), ALLOC(mp)); -#endif - s_mp_free(DIGITS(mp)); - DIGITS(mp) = NULL; - } - - USED(mp) = 0; - ALLOC(mp) = 0; - -} /* end mp_clear() */ - -/* }}} */ - -/* {{{ mp_clear_array(mp[], count) */ - -void mp_clear_array(mp_int mp[], int count) -{ - ARGCHK(mp != NULL && count > 0, MP_BADARG); - - while(--count >= 0) - mp_clear(&mp[count]); - -} /* end mp_clear_array() */ - -/* }}} */ - -/* {{{ mp_zero(mp) */ - -/* - mp_zero(mp) - - Set mp to zero. Does not change the allocated size of the structure, - and therefore cannot fail (except on a bad argument, which we ignore) - */ -void mp_zero(mp_int *mp) -{ - if(mp == NULL) - return; - - s_mp_setz(DIGITS(mp), ALLOC(mp)); - USED(mp) = 1; - SIGN(mp) = MP_ZPOS; - -} /* end mp_zero() */ - -/* }}} */ - -/* {{{ mp_set(mp, d) */ - -void mp_set(mp_int *mp, mp_digit d) -{ - if(mp == NULL) - return; - - mp_zero(mp); - DIGIT(mp, 0) = d; - -} /* end mp_set() */ - -/* }}} */ - -/* {{{ mp_set_int(mp, z) */ - -mp_err mp_set_int(mp_int *mp, long z) -{ - int ix; - unsigned long v = abs(z); - mp_err res; - - ARGCHK(mp != NULL, MP_BADARG); - - mp_zero(mp); - if(z == 0) - return MP_OKAY; /* shortcut for zero */ - - for(ix = sizeof(long) - 1; ix >= 0; ix--) { - - if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY) - return res; - - res = s_mp_add_d(mp, - (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX)); - if(res != MP_OKAY) - return res; - - } - - if(z < 0) - SIGN(mp) = MP_NEG; - - return MP_OKAY; - -} /* end mp_set_int() */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ Digit arithmetic */ - -/* {{{ mp_add_d(a, d, b) */ - -/* - mp_add_d(a, d, b) - - Compute the sum b = a + d, for a single digit d. Respects the sign of - its primary addend (single digits are unsigned anyway). - */ - -mp_err mp_add_d(mp_int *a, mp_digit d, mp_int *b) -{ - mp_err res = MP_OKAY; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - if((res = mp_copy(a, b)) != MP_OKAY) - return res; - - if(SIGN(b) == MP_ZPOS) { - res = s_mp_add_d(b, d); - } else if(s_mp_cmp_d(b, d) >= 0) { - res = s_mp_sub_d(b, d); - } else { - SIGN(b) = MP_ZPOS; - - DIGIT(b, 0) = d - DIGIT(b, 0); - } - - return res; - -} /* end mp_add_d() */ - -/* }}} */ - -/* {{{ mp_sub_d(a, d, b) */ - -/* - mp_sub_d(a, d, b) - - Compute the difference b = a - d, for a single digit d. Respects the - sign of its subtrahend (single digits are unsigned anyway). - */ - -mp_err mp_sub_d(mp_int *a, mp_digit d, mp_int *b) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - if((res = mp_copy(a, b)) != MP_OKAY) - return res; - - if(SIGN(b) == MP_NEG) { - if((res = s_mp_add_d(b, d)) != MP_OKAY) - return res; - - } else if(s_mp_cmp_d(b, d) >= 0) { - if((res = s_mp_sub_d(b, d)) != MP_OKAY) - return res; - - } else { - mp_neg(b, b); - - DIGIT(b, 0) = d - DIGIT(b, 0); - SIGN(b) = MP_NEG; - } - - if(s_mp_cmp_d(b, 0) == 0) - SIGN(b) = MP_ZPOS; - - return MP_OKAY; - -} /* end mp_sub_d() */ - -/* }}} */ - -/* {{{ mp_mul_d(a, d, b) */ - -/* - mp_mul_d(a, d, b) - - Compute the product b = a * d, for a single digit d. Respects the sign - of its multiplicand (single digits are unsigned anyway) - */ - -mp_err mp_mul_d(mp_int *a, mp_digit d, mp_int *b) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - if(d == 0) { - mp_zero(b); - return MP_OKAY; - } - - if((res = mp_copy(a, b)) != MP_OKAY) - return res; - - res = s_mp_mul_d(b, d); - - return res; - -} /* end mp_mul_d() */ - -/* }}} */ - -/* {{{ mp_mul_2(a, c) */ - -mp_err mp_mul_2(mp_int *a, mp_int *c) -{ - mp_err res; - - ARGCHK(a != NULL && c != NULL, MP_BADARG); - - if((res = mp_copy(a, c)) != MP_OKAY) - return res; - - return s_mp_mul_2(c); - -} /* end mp_mul_2() */ - -/* }}} */ - -/* {{{ mp_div_d(a, d, q, r) */ - -/* - mp_div_d(a, d, q, r) - - Compute the quotient q = a / d and remainder r = a mod d, for a - single digit d. Respects the sign of its divisor (single digits are - unsigned anyway). - */ - -mp_err mp_div_d(mp_int *a, mp_digit d, mp_int *q, mp_digit *r) -{ - mp_err res; - mp_digit rem; - int pow; - - ARGCHK(a != NULL, MP_BADARG); - - if(d == 0) - return MP_RANGE; - - /* Shortcut for powers of two ... */ - if((pow = s_mp_ispow2d(d)) >= 0) { - mp_digit mask; - - mask = (1 << pow) - 1; - rem = DIGIT(a, 0) & mask; - - if(q) { - mp_copy(a, q); - s_mp_div_2d(q, pow); - } - - if(r) - *r = rem; - - return MP_OKAY; - } - - /* - If the quotient is actually going to be returned, we'll try to - avoid hitting the memory allocator by copying the dividend into it - and doing the division there. This can't be any _worse_ than - always copying, and will sometimes be better (since it won't make - another copy) - - If it's not going to be returned, we need to allocate a temporary - to hold the quotient, which will just be discarded. - */ - if(q) { - if((res = mp_copy(a, q)) != MP_OKAY) - return res; - - res = s_mp_div_d(q, d, &rem); - if(s_mp_cmp_d(q, 0) == MP_EQ) - SIGN(q) = MP_ZPOS; - - } else { - mp_int qp; - - if((res = mp_init_copy(&qp, a)) != MP_OKAY) - return res; - - res = s_mp_div_d(&qp, d, &rem); - if(s_mp_cmp_d(&qp, 0) == 0) - SIGN(&qp) = MP_ZPOS; - - mp_clear(&qp); - } - - if(r) - *r = rem; - - return res; - -} /* end mp_div_d() */ - -/* }}} */ - -/* {{{ mp_div_2(a, c) */ - -/* - mp_div_2(a, c) - - Compute c = a / 2, disregarding the remainder. - */ - -mp_err mp_div_2(mp_int *a, mp_int *c) -{ - mp_err res; - - ARGCHK(a != NULL && c != NULL, MP_BADARG); - - if((res = mp_copy(a, c)) != MP_OKAY) - return res; - - s_mp_div_2(c); - - return MP_OKAY; - -} /* end mp_div_2() */ - -/* }}} */ - -/* {{{ mp_expt_d(a, d, b) */ - -mp_err mp_expt_d(mp_int *a, mp_digit d, mp_int *c) -{ - mp_int s, x; - mp_err res; - - ARGCHK(a != NULL && c != NULL, MP_BADARG); - - if((res = mp_init(&s)) != MP_OKAY) - return res; - if((res = mp_init_copy(&x, a)) != MP_OKAY) - goto X; - - DIGIT(&s, 0) = 1; - - while(d != 0) { - if(d & 1) { - if((res = s_mp_mul(&s, &x)) != MP_OKAY) - goto CLEANUP; - } - - d >>= 1; - - if((res = s_mp_sqr(&x)) != MP_OKAY) - goto CLEANUP; - } - - s_mp_exch(&s, c); - -CLEANUP: - mp_clear(&x); -X: - mp_clear(&s); - - return res; - -} /* end mp_expt_d() */ - -/* }}} */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ Full arithmetic */ - -/* {{{ mp_abs(a, b) */ - -/* - mp_abs(a, b) - - Compute b = |a|. 'a' and 'b' may be identical. - */ - -mp_err mp_abs(mp_int *a, mp_int *b) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - if((res = mp_copy(a, b)) != MP_OKAY) - return res; - - SIGN(b) = MP_ZPOS; - - return MP_OKAY; - -} /* end mp_abs() */ - -/* }}} */ - -/* {{{ mp_neg(a, b) */ - -/* - mp_neg(a, b) - - Compute b = -a. 'a' and 'b' may be identical. - */ - -mp_err mp_neg(mp_int *a, mp_int *b) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - if((res = mp_copy(a, b)) != MP_OKAY) - return res; - - if(s_mp_cmp_d(b, 0) == MP_EQ) - SIGN(b) = MP_ZPOS; - else - SIGN(b) = (SIGN(b) == MP_NEG) ? MP_ZPOS : MP_NEG; - - return MP_OKAY; - -} /* end mp_neg() */ - -/* }}} */ - -/* {{{ mp_add(a, b, c) */ - -/* - mp_add(a, b, c) - - Compute c = a + b. All parameters may be identical. - */ - -mp_err mp_add(mp_int *a, mp_int *b, mp_int *c) -{ - mp_err res; - int cmp; - - ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); - - if(SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */ - - /* Commutativity of addition lets us do this in either order, - so we avoid having to use a temporary even if the result - is supposed to replace the output - */ - if(c == b) { - if((res = s_mp_add(c, a)) != MP_OKAY) - return res; - } else { - if(c != a && (res = mp_copy(a, c)) != MP_OKAY) - return res; - - if((res = s_mp_add(c, b)) != MP_OKAY) - return res; - } - - } else if((cmp = s_mp_cmp(a, b)) > 0) { /* different sign: a > b */ - - /* If the output is going to be clobbered, we will use a temporary - variable; otherwise, we'll do it without touching the memory - allocator at all, if possible - */ - if(c == b) { - mp_int tmp; - - if((res = mp_init_copy(&tmp, a)) != MP_OKAY) - return res; - if((res = s_mp_sub(&tmp, b)) != MP_OKAY) { - mp_clear(&tmp); - return res; - } - - s_mp_exch(&tmp, c); - mp_clear(&tmp); - - } else { - - if(c != a && (res = mp_copy(a, c)) != MP_OKAY) - return res; - if((res = s_mp_sub(c, b)) != MP_OKAY) - return res; - - } - - } else if(cmp == 0) { /* different sign, a == b */ - - mp_zero(c); - return MP_OKAY; - - } else { /* different sign: a < b */ - - /* See above... */ - if(c == a) { - mp_int tmp; - - if((res = mp_init_copy(&tmp, b)) != MP_OKAY) - return res; - if((res = s_mp_sub(&tmp, a)) != MP_OKAY) { - mp_clear(&tmp); - return res; - } - - s_mp_exch(&tmp, c); - mp_clear(&tmp); - - } else { - - if(c != b && (res = mp_copy(b, c)) != MP_OKAY) - return res; - if((res = s_mp_sub(c, a)) != MP_OKAY) - return res; - - } - } - - if(USED(c) == 1 && DIGIT(c, 0) == 0) - SIGN(c) = MP_ZPOS; - - return MP_OKAY; - -} /* end mp_add() */ - -/* }}} */ - -/* {{{ mp_sub(a, b, c) */ - -/* - mp_sub(a, b, c) - - Compute c = a - b. All parameters may be identical. - */ - -mp_err mp_sub(mp_int *a, mp_int *b, mp_int *c) -{ - mp_err res; - int cmp; - - ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); - - if(SIGN(a) != SIGN(b)) { - if(c == a) { - if((res = s_mp_add(c, b)) != MP_OKAY) - return res; - } else { - if(c != b && ((res = mp_copy(b, c)) != MP_OKAY)) - return res; - if((res = s_mp_add(c, a)) != MP_OKAY) - return res; - SIGN(c) = SIGN(a); - } - - } else if((cmp = s_mp_cmp(a, b)) > 0) { /* Same sign, a > b */ - if(c == b) { - mp_int tmp; - - if((res = mp_init_copy(&tmp, a)) != MP_OKAY) - return res; - if((res = s_mp_sub(&tmp, b)) != MP_OKAY) { - mp_clear(&tmp); - return res; - } - s_mp_exch(&tmp, c); - mp_clear(&tmp); - - } else { - if(c != a && ((res = mp_copy(a, c)) != MP_OKAY)) - return res; - - if((res = s_mp_sub(c, b)) != MP_OKAY) - return res; - } - - } else if(cmp == 0) { /* Same sign, equal magnitude */ - mp_zero(c); - return MP_OKAY; - - } else { /* Same sign, b > a */ - if(c == a) { - mp_int tmp; - - if((res = mp_init_copy(&tmp, b)) != MP_OKAY) - return res; - - if((res = s_mp_sub(&tmp, a)) != MP_OKAY) { - mp_clear(&tmp); - return res; - } - s_mp_exch(&tmp, c); - mp_clear(&tmp); - - } else { - if(c != b && ((res = mp_copy(b, c)) != MP_OKAY)) - return res; - - if((res = s_mp_sub(c, a)) != MP_OKAY) - return res; - } - - SIGN(c) = !SIGN(b); - } - - if(USED(c) == 1 && DIGIT(c, 0) == 0) - SIGN(c) = MP_ZPOS; - - return MP_OKAY; - -} /* end mp_sub() */ - -/* }}} */ - -/* {{{ mp_mul(a, b, c) */ - -/* - mp_mul(a, b, c) - - Compute c = a * b. All parameters may be identical. - */ - -mp_err mp_mul(mp_int *a, mp_int *b, mp_int *c) -{ - mp_err res; - mp_sign sgn; - - ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); - - sgn = (SIGN(a) == SIGN(b)) ? MP_ZPOS : MP_NEG; - - if(c == b) { - if((res = s_mp_mul(c, a)) != MP_OKAY) - return res; - - } else { - if((res = mp_copy(a, c)) != MP_OKAY) - return res; - - if((res = s_mp_mul(c, b)) != MP_OKAY) - return res; - } - - if(sgn == MP_ZPOS || s_mp_cmp_d(c, 0) == MP_EQ) - SIGN(c) = MP_ZPOS; - else - SIGN(c) = sgn; - - return MP_OKAY; - -} /* end mp_mul() */ - -/* }}} */ - -/* {{{ mp_mul_2d(a, d, c) */ - -/* - mp_mul_2d(a, d, c) - - Compute c = a * 2^d. a may be the same as c. - */ - -mp_err mp_mul_2d(mp_int *a, mp_digit d, mp_int *c) -{ - mp_err res; - - ARGCHK(a != NULL && c != NULL, MP_BADARG); - - if((res = mp_copy(a, c)) != MP_OKAY) - return res; - - if(d == 0) - return MP_OKAY; - - return s_mp_mul_2d(c, d); - -} /* end mp_mul() */ - -/* }}} */ - -/* {{{ mp_sqr(a, b) */ - -#if MP_SQUARE -mp_err mp_sqr(mp_int *a, mp_int *b) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - if((res = mp_copy(a, b)) != MP_OKAY) - return res; - - if((res = s_mp_sqr(b)) != MP_OKAY) - return res; - - SIGN(b) = MP_ZPOS; - - return MP_OKAY; - -} /* end mp_sqr() */ -#endif - -/* }}} */ - -/* {{{ mp_div(a, b, q, r) */ - -/* - mp_div(a, b, q, r) - - Compute q = a / b and r = a mod b. Input parameters may be re-used - as output parameters. If q or r is NULL, that portion of the - computation will be discarded (although it will still be computed) - - Pay no attention to the hacker behind the curtain. - */ - -mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r) -{ - mp_err res; - mp_int qtmp, rtmp; - int cmp; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - if(mp_cmp_z(b) == MP_EQ) - return MP_RANGE; - - /* If a <= b, we can compute the solution without division, and - avoid any memory allocation - */ - if((cmp = s_mp_cmp(a, b)) < 0) { - if(r) { - if((res = mp_copy(a, r)) != MP_OKAY) - return res; - } - - if(q) - mp_zero(q); - - return MP_OKAY; - - } else if(cmp == 0) { - - /* Set quotient to 1, with appropriate sign */ - if(q) { - int qneg = (SIGN(a) != SIGN(b)); - - mp_set(q, 1); - if(qneg) - SIGN(q) = MP_NEG; - } - - if(r) - mp_zero(r); - - return MP_OKAY; - } - - /* If we get here, it means we actually have to do some division */ - - /* Set up some temporaries... */ - if((res = mp_init_copy(&qtmp, a)) != MP_OKAY) - return res; - if((res = mp_init_copy(&rtmp, b)) != MP_OKAY) - goto CLEANUP; - - if((res = s_mp_div(&qtmp, &rtmp)) != MP_OKAY) - goto CLEANUP; - - /* Compute the signs for the output */ - SIGN(&rtmp) = SIGN(a); /* Sr = Sa */ - if(SIGN(a) == SIGN(b)) - SIGN(&qtmp) = MP_ZPOS; /* Sq = MP_ZPOS if Sa = Sb */ - else - SIGN(&qtmp) = MP_NEG; /* Sq = MP_NEG if Sa != Sb */ - - if(s_mp_cmp_d(&qtmp, 0) == MP_EQ) - SIGN(&qtmp) = MP_ZPOS; - if(s_mp_cmp_d(&rtmp, 0) == MP_EQ) - SIGN(&rtmp) = MP_ZPOS; - - /* Copy output, if it is needed */ - if(q) - s_mp_exch(&qtmp, q); - - if(r) - s_mp_exch(&rtmp, r); - -CLEANUP: - mp_clear(&rtmp); - mp_clear(&qtmp); - - return res; - -} /* end mp_div() */ - -/* }}} */ - -/* {{{ mp_div_2d(a, d, q, r) */ - -mp_err mp_div_2d(mp_int *a, mp_digit d, mp_int *q, mp_int *r) -{ - mp_err res; - - ARGCHK(a != NULL, MP_BADARG); - - if(q) { - if((res = mp_copy(a, q)) != MP_OKAY) - return res; - - s_mp_div_2d(q, d); - } - - if(r) { - if((res = mp_copy(a, r)) != MP_OKAY) - return res; - - s_mp_mod_2d(r, d); - } - - return MP_OKAY; - -} /* end mp_div_2d() */ - -/* }}} */ - -/* {{{ mp_expt(a, b, c) */ - -/* - mp_expt(a, b, c) - - Compute c = a ** b, that is, raise a to the b power. Uses a - standard iterative square-and-multiply technique. - */ - -mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c) -{ - mp_int s, x; - mp_err res; - mp_digit d; - int dig, bit; - - ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); - - if(mp_cmp_z(b) < 0) - return MP_RANGE; - - if((res = mp_init(&s)) != MP_OKAY) - return res; - - mp_set(&s, 1); - - if((res = mp_init_copy(&x, a)) != MP_OKAY) - goto X; - - /* Loop over low-order digits in ascending order */ - for(dig = 0; dig < (USED(b) - 1); dig++) { - d = DIGIT(b, dig); - - /* Loop over bits of each non-maximal digit */ - for(bit = 0; bit < DIGIT_BIT; bit++) { - if(d & 1) { - if((res = s_mp_mul(&s, &x)) != MP_OKAY) - goto CLEANUP; - } - - d >>= 1; - - if((res = s_mp_sqr(&x)) != MP_OKAY) - goto CLEANUP; - } - } - - /* Consider now the last digit... */ - d = DIGIT(b, dig); - - while(d) { - if(d & 1) { - if((res = s_mp_mul(&s, &x)) != MP_OKAY) - goto CLEANUP; - } - - d >>= 1; - - if((res = s_mp_sqr(&x)) != MP_OKAY) - goto CLEANUP; - } - - if(mp_iseven(b)) - SIGN(&s) = SIGN(a); - - res = mp_copy(&s, c); - -CLEANUP: - mp_clear(&x); -X: - mp_clear(&s); - - return res; - -} /* end mp_expt() */ - -/* }}} */ - -/* {{{ mp_2expt(a, k) */ - -/* Compute a = 2^k */ - -mp_err mp_2expt(mp_int *a, mp_digit k) -{ - ARGCHK(a != NULL, MP_BADARG); - - return s_mp_2expt(a, k); - -} /* end mp_2expt() */ - -/* }}} */ - -/* {{{ mp_mod(a, m, c) */ - -/* - mp_mod(a, m, c) - - Compute c = a (mod m). Result will always be 0 <= c < m. - */ - -mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c) -{ - mp_err res; - int mag; - - ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); - - if(SIGN(m) == MP_NEG) - return MP_RANGE; - - /* - If |a| > m, we need to divide to get the remainder and take the - absolute value. - - If |a| < m, we don't need to do any division, just copy and adjust - the sign (if a is negative). - - If |a| == m, we can simply set the result to zero. - - This order is intended to minimize the average path length of the - comparison chain on common workloads -- the most frequent cases are - that |a| != m, so we do those first. - */ - if((mag = s_mp_cmp(a, m)) > 0) { - if((res = mp_div(a, m, NULL, c)) != MP_OKAY) - return res; - - if(SIGN(c) == MP_NEG) { - if((res = mp_add(c, m, c)) != MP_OKAY) - return res; - } - - } else if(mag < 0) { - if((res = mp_copy(a, c)) != MP_OKAY) - return res; - - if(mp_cmp_z(a) < 0) { - if((res = mp_add(c, m, c)) != MP_OKAY) - return res; - - } - - } else { - mp_zero(c); - - } - - return MP_OKAY; - -} /* end mp_mod() */ - -/* }}} */ - -/* {{{ mp_mod_d(a, d, c) */ - -/* - mp_mod_d(a, d, c) - - Compute c = a (mod d). Result will always be 0 <= c < d - */ -mp_err mp_mod_d(mp_int *a, mp_digit d, mp_digit *c) -{ - mp_err res; - mp_digit rem; - - ARGCHK(a != NULL && c != NULL, MP_BADARG); - - if(s_mp_cmp_d(a, d) > 0) { - if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY) - return res; - - } else { - if(SIGN(a) == MP_NEG) - rem = d - DIGIT(a, 0); - else - rem = DIGIT(a, 0); - } - - if(c) - *c = rem; - - return MP_OKAY; - -} /* end mp_mod_d() */ - -/* }}} */ - -/* {{{ mp_sqrt(a, b) */ - -/* - mp_sqrt(a, b) - - Compute the integer square root of a, and store the result in b. - Uses an integer-arithmetic version of Newton's iterative linear - approximation technique to determine this value; the result has the - following two properties: - - b^2 <= a - (b+1)^2 >= a - - It is a range error to pass a negative value. - */ -mp_err mp_sqrt(mp_int *a, mp_int *b) -{ - mp_int x, t; - mp_err res; - - ARGCHK(a != NULL && b != NULL, MP_BADARG); - - /* Cannot take square root of a negative value */ - if(SIGN(a) == MP_NEG) - return MP_RANGE; - - /* Special cases for zero and one, trivial */ - if(mp_cmp_d(a, 0) == MP_EQ || mp_cmp_d(a, 1) == MP_EQ) - return mp_copy(a, b); - - /* Initialize the temporaries we'll use below */ - if((res = mp_init_size(&t, USED(a))) != MP_OKAY) - return res; - - /* Compute an initial guess for the iteration as a itself */ - if((res = mp_init_copy(&x, a)) != MP_OKAY) - goto X; - -s_mp_rshd(&x, (USED(&x)/2)+1); -mp_add_d(&x, 1, &x); - - for(;;) { - /* t = (x * x) - a */ - mp_copy(&x, &t); /* can't fail, t is big enough for original x */ - if((res = mp_sqr(&t, &t)) != MP_OKAY || - (res = mp_sub(&t, a, &t)) != MP_OKAY) - goto CLEANUP; - - /* t = t / 2x */ - s_mp_mul_2(&x); - if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY) - goto CLEANUP; - s_mp_div_2(&x); - - /* Terminate the loop, if the quotient is zero */ - if(mp_cmp_z(&t) == MP_EQ) - break; - - /* x = x - t */ - if((res = mp_sub(&x, &t, &x)) != MP_OKAY) - goto CLEANUP; - - } - - /* Copy result to output parameter */ - mp_sub_d(&x, 1, &x); - s_mp_exch(&x, b); - - CLEANUP: - mp_clear(&x); - X: - mp_clear(&t); - - return res; - -} /* end mp_sqrt() */ - -/* }}} */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ Modular arithmetic */ - -#if MP_MODARITH -/* {{{ mp_addmod(a, b, m, c) */ - -/* - mp_addmod(a, b, m, c) - - Compute c = (a + b) mod m - */ - -mp_err mp_addmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); - - if((res = mp_add(a, b, c)) != MP_OKAY) - return res; - if((res = mp_mod(c, m, c)) != MP_OKAY) - return res; - - return MP_OKAY; - -} - -/* }}} */ - -/* {{{ mp_submod(a, b, m, c) */ - -/* - mp_submod(a, b, m, c) - - Compute c = (a - b) mod m - */ - -mp_err mp_submod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); - - if((res = mp_sub(a, b, c)) != MP_OKAY) - return res; - if((res = mp_mod(c, m, c)) != MP_OKAY) - return res; - - return MP_OKAY; - -} - -/* }}} */ - -/* {{{ mp_mulmod(a, b, m, c) */ - -/* - mp_mulmod(a, b, m, c) - - Compute c = (a * b) mod m - */ - -mp_err mp_mulmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) -{ - mp_err res; - - ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); - - if((res = mp_mul(a, b, c)) != MP_OKAY) - return res; - if((res = mp_mod(c, m, c)) != MP_OKAY) - return res; - - return MP_OKAY; - -} - -/* }}} */ - -/* {{{ mp_sqrmod(a, m, c) */ - -#if MP_SQUARE -mp_err mp_sqrmod(mp_int *a, mp_int *m, mp_int *c) -{ - mp_err res; - - ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); - - if((res = mp_sqr(a, c)) != MP_OKAY) - return res; - if((res = mp_mod(c, m, c)) != MP_OKAY) - return res; - - return MP_OKAY; - -} /* end mp_sqrmod() */ -#endif - -/* }}} */ - -/* {{{ mp_exptmod(a, b, m, c) */ - -/* - mp_exptmod(a, b, m, c) - - Compute c = (a ** b) mod m. Uses a standard square-and-multiply - method with modular reductions at each step. (This is basically the - same code as mp_expt(), except for the addition of the reductions) - - The modular reductions are done using Barrett's algorithm (see - s_mp_reduce() below for details) - */ - -mp_err mp_exptmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) -{ - mp_int s, x, mu; - mp_err res; - mp_digit d, *db = DIGITS(b); - mp_size ub = USED(b); - int dig, bit; - - ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); - - if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0) - return MP_RANGE; - - if((res = mp_init(&s)) != MP_OKAY) - return res; - if((res = mp_init_copy(&x, a)) != MP_OKAY) - goto X; - if((res = mp_mod(&x, m, &x)) != MP_OKAY || - (res = mp_init(&mu)) != MP_OKAY) - goto MU; - - mp_set(&s, 1); - - /* mu = b^2k / m */ - s_mp_add_d(&mu, 1); - s_mp_lshd(&mu, 2 * USED(m)); - if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY) - goto CLEANUP; - - /* Loop over digits of b in ascending order, except highest order */ - for(dig = 0; dig < (ub - 1); dig++) { - d = *db++; - - /* Loop over the bits of the lower-order digits */ - for(bit = 0; bit < DIGIT_BIT; bit++) { - if(d & 1) { - if((res = s_mp_mul(&s, &x)) != MP_OKAY) - goto CLEANUP; - if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) - goto CLEANUP; - } - - d >>= 1; - - if((res = s_mp_sqr(&x)) != MP_OKAY) - goto CLEANUP; - if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) - goto CLEANUP; - } - } - - /* Now do the last digit... */ - d = *db; - - while(d) { - if(d & 1) { - if((res = s_mp_mul(&s, &x)) != MP_OKAY) - goto CLEANUP; - if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) - goto CLEANUP; - } - - d >>= 1; - - if((res = s_mp_sqr(&x)) != MP_OKAY) - goto CLEANUP; - if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) - goto CLEANUP; - } - - s_mp_exch(&s, c); - - CLEANUP: - mp_clear(&mu); - MU: - mp_clear(&x); - X: - mp_clear(&s); - - return res; - -} /* end mp_exptmod() */ - -/* }}} */ - -/* {{{ mp_exptmod_d(a, d, m, c) */ - -mp_err mp_exptmod_d(mp_int *a, mp_digit d, mp_int *m, mp_int *c) -{ - mp_int s, x; - mp_err res; - - ARGCHK(a != NULL && c != NULL, MP_BADARG); - - if((res = mp_init(&s)) != MP_OKAY) - return res; - if((res = mp_init_copy(&x, a)) != MP_OKAY) - goto X; - - mp_set(&s, 1); - - while(d != 0) { - if(d & 1) { - if((res = s_mp_mul(&s, &x)) != MP_OKAY || - (res = mp_mod(&s, m, &s)) != MP_OKAY) - goto CLEANUP; - } - - d /= 2; - - if((res = s_mp_sqr(&x)) != MP_OKAY || - (res = mp_mod(&x, m, &x)) != MP_OKAY) - goto CLEANUP; - } - - s_mp_exch(&s, c); - -CLEANUP: - mp_clear(&x); -X: - mp_clear(&s); - - return res; - -} /* end mp_exptmod_d() */ - -/* }}} */ -#endif /* if MP_MODARITH */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ Comparison functions */ - -/* {{{ mp_cmp_z(a) */ - -/* - mp_cmp_z(a) - - Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0. - */ - -int mp_cmp_z(mp_int *a) -{ - if(SIGN(a) == MP_NEG) - return MP_LT; - else if(USED(a) == 1 && DIGIT(a, 0) == 0) - return MP_EQ; - else - return MP_GT; - -} /* end mp_cmp_z() */ - -/* }}} */ - -/* {{{ mp_cmp_d(a, d) */ - -/* - mp_cmp_d(a, d) - - Compare a <=> d. Returns <0 if a0 if a>d - */ - -int mp_cmp_d(mp_int *a, mp_digit d) -{ - ARGCHK(a != NULL, MP_EQ); - - if(SIGN(a) == MP_NEG) - return MP_LT; - - return s_mp_cmp_d(a, d); - -} /* end mp_cmp_d() */ - -/* }}} */ - -/* {{{ mp_cmp(a, b) */ - -int mp_cmp(mp_int *a, mp_int *b) -{ - ARGCHK(a != NULL && b != NULL, MP_EQ); - - if(SIGN(a) == SIGN(b)) { - int mag; - - if((mag = s_mp_cmp(a, b)) == MP_EQ) - return MP_EQ; - - if(SIGN(a) == MP_ZPOS) - return mag; - else - return -mag; - - } else if(SIGN(a) == MP_ZPOS) { - return MP_GT; - } else { - return MP_LT; - } - -} /* end mp_cmp() */ - -/* }}} */ - -/* {{{ mp_cmp_mag(a, b) */ - -/* - mp_cmp_mag(a, b) - - Compares |a| <=> |b|, and returns an appropriate comparison result - */ - -int mp_cmp_mag(mp_int *a, mp_int *b) -{ - ARGCHK(a != NULL && b != NULL, MP_EQ); - - return s_mp_cmp(a, b); - -} /* end mp_cmp_mag() */ - -/* }}} */ - -/* {{{ mp_cmp_int(a, z) */ - -/* - This just converts z to an mp_int, and uses the existing comparison - routines. This is sort of inefficient, but it's not clear to me how - frequently this wil get used anyway. For small positive constants, - you can always use mp_cmp_d(), and for zero, there is mp_cmp_z(). - */ -int mp_cmp_int(mp_int *a, long z) -{ - mp_int tmp; - int out; - - ARGCHK(a != NULL, MP_EQ); - - mp_init(&tmp); mp_set_int(&tmp, z); - out = mp_cmp(a, &tmp); - mp_clear(&tmp); - - return out; - -} /* end mp_cmp_int() */ - -/* }}} */ - -/* {{{ mp_isodd(a) */ - -/* - mp_isodd(a) - - Returns a true (non-zero) value if a is odd, false (zero) otherwise. - */ -int mp_isodd(mp_int *a) -{ - ARGCHK(a != NULL, 0); - - return (DIGIT(a, 0) & 1); - -} /* end mp_isodd() */ - -/* }}} */ - -/* {{{ mp_iseven(a) */ - -int mp_iseven(mp_int *a) -{ - return !mp_isodd(a); - -} /* end mp_iseven() */ - -/* }}} */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ Number theoretic functions */ - -#if MP_NUMTH -/* {{{ mp_gcd(a, b, c) */ - -/* - Like the old mp_gcd() function, except computes the GCD using the - binary algorithm due to Josef Stein in 1961 (via Knuth). - */ -mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c) -{ - mp_err res; - mp_int u, v, t; - mp_size k = 0; - - ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); - - if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ) - return MP_RANGE; - if(mp_cmp_z(a) == MP_EQ) { - return mp_copy(b, c); - } else if(mp_cmp_z(b) == MP_EQ) { - return mp_copy(a, c); - } - - if((res = mp_init(&t)) != MP_OKAY) - return res; - if((res = mp_init_copy(&u, a)) != MP_OKAY) - goto U; - if((res = mp_init_copy(&v, b)) != MP_OKAY) - goto V; - - SIGN(&u) = MP_ZPOS; - SIGN(&v) = MP_ZPOS; - - /* Divide out common factors of 2 until at least 1 of a, b is even */ - while(mp_iseven(&u) && mp_iseven(&v)) { - s_mp_div_2(&u); - s_mp_div_2(&v); - ++k; - } - - /* Initialize t */ - if(mp_isodd(&u)) { - if((res = mp_copy(&v, &t)) != MP_OKAY) - goto CLEANUP; - - /* t = -v */ - if(SIGN(&v) == MP_ZPOS) - SIGN(&t) = MP_NEG; - else - SIGN(&t) = MP_ZPOS; - - } else { - if((res = mp_copy(&u, &t)) != MP_OKAY) - goto CLEANUP; - - } - - for(;;) { - while(mp_iseven(&t)) { - s_mp_div_2(&t); - } - - if(mp_cmp_z(&t) == MP_GT) { - if((res = mp_copy(&t, &u)) != MP_OKAY) - goto CLEANUP; - - } else { - if((res = mp_copy(&t, &v)) != MP_OKAY) - goto CLEANUP; - - /* v = -t */ - if(SIGN(&t) == MP_ZPOS) - SIGN(&v) = MP_NEG; - else - SIGN(&v) = MP_ZPOS; - } - - if((res = mp_sub(&u, &v, &t)) != MP_OKAY) - goto CLEANUP; - - if(s_mp_cmp_d(&t, 0) == MP_EQ) - break; - } - - s_mp_2expt(&v, k); /* v = 2^k */ - res = mp_mul(&u, &v, c); /* c = u * v */ - - CLEANUP: - mp_clear(&v); - V: - mp_clear(&u); - U: - mp_clear(&t); - - return res; - -} /* end mp_bgcd() */ - -/* }}} */ - -/* {{{ mp_lcm(a, b, c) */ - -/* We compute the least common multiple using the rule: - - ab = [a, b](a, b) - - ... by computing the product, and dividing out the gcd. - */ - -mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c) -{ - mp_int gcd, prod; - mp_err res; - - ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); - - /* Set up temporaries */ - if((res = mp_init(&gcd)) != MP_OKAY) - return res; - if((res = mp_init(&prod)) != MP_OKAY) - goto GCD; - - if((res = mp_mul(a, b, &prod)) != MP_OKAY) - goto CLEANUP; - if((res = mp_gcd(a, b, &gcd)) != MP_OKAY) - goto CLEANUP; - - res = mp_div(&prod, &gcd, c, NULL); - - CLEANUP: - mp_clear(&prod); - GCD: - mp_clear(&gcd); - - return res; - -} /* end mp_lcm() */ - -/* }}} */ - -/* {{{ mp_xgcd(a, b, g, x, y) */ - -/* - mp_xgcd(a, b, g, x, y) - - Compute g = (a, b) and values x and y satisfying Bezout's identity - (that is, ax + by = g). This uses the extended binary GCD algorithm - based on the Stein algorithm used for mp_gcd() - */ - -mp_err mp_xgcd(mp_int *a, mp_int *b, mp_int *g, mp_int *x, mp_int *y) -{ - mp_int gx, xc, yc, u, v, A, B, C, D; - mp_int *clean[9]; - mp_err res; - int last = -1; - - if(mp_cmp_z(b) == 0) - return MP_RANGE; - - /* Initialize all these variables we need */ - if((res = mp_init(&u)) != MP_OKAY) goto CLEANUP; - clean[++last] = &u; - if((res = mp_init(&v)) != MP_OKAY) goto CLEANUP; - clean[++last] = &v; - if((res = mp_init(&gx)) != MP_OKAY) goto CLEANUP; - clean[++last] = &gx; - if((res = mp_init(&A)) != MP_OKAY) goto CLEANUP; - clean[++last] = &A; - if((res = mp_init(&B)) != MP_OKAY) goto CLEANUP; - clean[++last] = &B; - if((res = mp_init(&C)) != MP_OKAY) goto CLEANUP; - clean[++last] = &C; - if((res = mp_init(&D)) != MP_OKAY) goto CLEANUP; - clean[++last] = &D; - if((res = mp_init_copy(&xc, a)) != MP_OKAY) goto CLEANUP; - clean[++last] = &xc; - mp_abs(&xc, &xc); - if((res = mp_init_copy(&yc, b)) != MP_OKAY) goto CLEANUP; - clean[++last] = &yc; - mp_abs(&yc, &yc); - - mp_set(&gx, 1); - - /* Divide by two until at least one of them is even */ - while(mp_iseven(&xc) && mp_iseven(&yc)) { - s_mp_div_2(&xc); - s_mp_div_2(&yc); - if((res = s_mp_mul_2(&gx)) != MP_OKAY) - goto CLEANUP; - } - - mp_copy(&xc, &u); - mp_copy(&yc, &v); - mp_set(&A, 1); mp_set(&D, 1); - - /* Loop through binary GCD algorithm */ - for(;;) { - while(mp_iseven(&u)) { - s_mp_div_2(&u); - - if(mp_iseven(&A) && mp_iseven(&B)) { - s_mp_div_2(&A); s_mp_div_2(&B); - } else { - if((res = mp_add(&A, &yc, &A)) != MP_OKAY) goto CLEANUP; - s_mp_div_2(&A); - if((res = mp_sub(&B, &xc, &B)) != MP_OKAY) goto CLEANUP; - s_mp_div_2(&B); - } - } - - while(mp_iseven(&v)) { - s_mp_div_2(&v); - - if(mp_iseven(&C) && mp_iseven(&D)) { - s_mp_div_2(&C); s_mp_div_2(&D); - } else { - if((res = mp_add(&C, &yc, &C)) != MP_OKAY) goto CLEANUP; - s_mp_div_2(&C); - if((res = mp_sub(&D, &xc, &D)) != MP_OKAY) goto CLEANUP; - s_mp_div_2(&D); - } - } - - if(mp_cmp(&u, &v) >= 0) { - if((res = mp_sub(&u, &v, &u)) != MP_OKAY) goto CLEANUP; - if((res = mp_sub(&A, &C, &A)) != MP_OKAY) goto CLEANUP; - if((res = mp_sub(&B, &D, &B)) != MP_OKAY) goto CLEANUP; - - } else { - if((res = mp_sub(&v, &u, &v)) != MP_OKAY) goto CLEANUP; - if((res = mp_sub(&C, &A, &C)) != MP_OKAY) goto CLEANUP; - if((res = mp_sub(&D, &B, &D)) != MP_OKAY) goto CLEANUP; - - } - - /* If we're done, copy results to output */ - if(mp_cmp_z(&u) == 0) { - if(x) - if((res = mp_copy(&C, x)) != MP_OKAY) goto CLEANUP; - - if(y) - if((res = mp_copy(&D, y)) != MP_OKAY) goto CLEANUP; - - if(g) - if((res = mp_mul(&gx, &v, g)) != MP_OKAY) goto CLEANUP; - - break; - } - } - - CLEANUP: - while(last >= 0) - mp_clear(clean[last--]); - - return res; - -} /* end mp_xgcd() */ - -/* }}} */ - -/* {{{ mp_invmod(a, m, c) */ - -/* - mp_invmod(a, m, c) - - Compute c = a^-1 (mod m), if there is an inverse for a (mod m). - This is equivalent to the question of whether (a, m) = 1. If not, - MP_UNDEF is returned, and there is no inverse. - */ - -mp_err mp_invmod(mp_int *a, mp_int *m, mp_int *c) -{ - mp_int g, x; - mp_err res; - - ARGCHK(a && m && c, MP_BADARG); - - if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) - return MP_RANGE; - - if((res = mp_init(&g)) != MP_OKAY) - return res; - if((res = mp_init(&x)) != MP_OKAY) - goto X; - - if((res = mp_xgcd(a, m, &g, &x, NULL)) != MP_OKAY) - goto CLEANUP; - - if(mp_cmp_d(&g, 1) != MP_EQ) { - res = MP_UNDEF; - goto CLEANUP; - } - - res = mp_mod(&x, m, c); - SIGN(c) = SIGN(a); - -CLEANUP: - mp_clear(&x); -X: - mp_clear(&g); - - return res; - -} /* end mp_invmod() */ - -/* }}} */ -#endif /* if MP_NUMTH */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ mp_print(mp, ofp) */ - -#if MP_IOFUNC -/* - mp_print(mp, ofp) - - Print a textual representation of the given mp_int on the output - stream 'ofp'. Output is generated using the internal radix. - */ - -void mp_print(mp_int *mp, FILE *ofp) -{ - int ix; - - if(mp == NULL || ofp == NULL) - return; - - fputc((SIGN(mp) == MP_NEG) ? '-' : '+', ofp); - - for(ix = USED(mp) - 1; ix >= 0; ix--) { - fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix)); - } - -} /* end mp_print() */ - -#endif /* if MP_IOFUNC */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* {{{ More I/O Functions */ - -/* {{{ mp_read_signed_bin(mp, str, len) */ - -/* - mp_read_signed_bin(mp, str, len) - - Read in a raw value (base 256) into the given mp_int - */ - -mp_err mp_read_signed_bin(mp_int *mp, unsigned char *str, int len) -{ - mp_err res; - - ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); - - if((res = mp_read_unsigned_bin(mp, str + 1, len - 1)) == MP_OKAY) { - /* Get sign from first byte */ - if(str[0]) - SIGN(mp) = MP_NEG; - else - SIGN(mp) = MP_ZPOS; - } - - return res; - -} /* end mp_read_signed_bin() */ - -/* }}} */ - -/* {{{ mp_signed_bin_size(mp) */ - -int mp_signed_bin_size(mp_int *mp) -{ - ARGCHK(mp != NULL, 0); - - return mp_unsigned_bin_size(mp) + 1; - -} /* end mp_signed_bin_size() */ - -/* }}} */ - -/* {{{ mp_to_signed_bin(mp, str) */ - -mp_err mp_to_signed_bin(mp_int *mp, unsigned char *str) -{ - ARGCHK(mp != NULL && str != NULL, MP_BADARG); - - /* Caller responsible for allocating enough memory (use mp_raw_size(mp)) */ - str[0] = (char)SIGN(mp); - - return mp_to_unsigned_bin(mp, str + 1); - -} /* end mp_to_signed_bin() */ - -/* }}} */ - -/* {{{ mp_read_unsigned_bin(mp, str, len) */ - -/* - mp_read_unsigned_bin(mp, str, len) - - Read in an unsigned value (base 256) into the given mp_int - */ - -mp_err mp_read_unsigned_bin(mp_int *mp, unsigned char *str, int len) -{ - int ix; - mp_err res; - - ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); - - mp_zero(mp); - - for(ix = 0; ix < len; ix++) { - if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY) - return res; - - if((res = mp_add_d(mp, str[ix], mp)) != MP_OKAY) - return res; - } - - return MP_OKAY; - -} /* end mp_read_unsigned_bin() */ - -/* }}} */ - -/* {{{ mp_unsigned_bin_size(mp) */ - -int mp_unsigned_bin_size(mp_int *mp) -{ - mp_digit topdig; - int count; - - ARGCHK(mp != NULL, 0); - - /* Special case for the value zero */ - if(USED(mp) == 1 && DIGIT(mp, 0) == 0) - return 1; - - count = (USED(mp) - 1) * sizeof(mp_digit); - topdig = DIGIT(mp, USED(mp) - 1); - - while(topdig != 0) { - ++count; - topdig >>= CHAR_BIT; - } - - return count; - -} /* end mp_unsigned_bin_size() */ - -/* }}} */ - -/* {{{ mp_to_unsigned_bin(mp, str) */ - -mp_err mp_to_unsigned_bin(mp_int *mp, unsigned char *str) -{ - mp_digit *dp, *end, d; - unsigned char *spos; - - ARGCHK(mp != NULL && str != NULL, MP_BADARG); - - dp = DIGITS(mp); - end = dp + USED(mp) - 1; - spos = str; - - /* Special case for zero, quick test */ - if(dp == end && *dp == 0) { - *str = '\0'; - return MP_OKAY; - } - - /* Generate digits in reverse order */ - while(dp < end) { - int ix; - - d = *dp; - for(ix = 0; ix < sizeof(mp_digit); ++ix) { - *spos = d & UCHAR_MAX; - d >>= CHAR_BIT; - ++spos; - } - - ++dp; - } - - /* Now handle last digit specially, high order zeroes are not written */ - d = *end; - while(d != 0) { - *spos = d & UCHAR_MAX; - d >>= CHAR_BIT; - ++spos; - } - - /* Reverse everything to get digits in the correct order */ - while(--spos > str) { - unsigned char t = *str; - *str = *spos; - *spos = t; - - ++str; - } - - return MP_OKAY; - -} /* end mp_to_unsigned_bin() */ - -/* }}} */ - -/* {{{ mp_count_bits(mp) */ - -int mp_count_bits(mp_int *mp) -{ - int len; - mp_digit d; - - ARGCHK(mp != NULL, MP_BADARG); - - len = DIGIT_BIT * (USED(mp) - 1); - d = DIGIT(mp, USED(mp) - 1); - - while(d != 0) { - ++len; - d >>= 1; - } - - return len; - -} /* end mp_count_bits() */ - -/* }}} */ - -/* {{{ mp_read_radix(mp, str, radix) */ - -/* - mp_read_radix(mp, str, radix) - - Read an integer from the given string, and set mp to the resulting - value. The input is presumed to be in base 10. Leading non-digit - characters are ignored, and the function reads until a non-digit - character or the end of the string. - */ - -mp_err mp_read_radix(mp_int *mp, unsigned char *str, int radix) -{ - int ix = 0, val = 0; - mp_err res; - mp_sign sig = MP_ZPOS; - - ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, - MP_BADARG); - - mp_zero(mp); - - /* Skip leading non-digit characters until a digit or '-' or '+' */ - while(str[ix] && - (s_mp_tovalue(str[ix], radix) < 0) && - str[ix] != '-' && - str[ix] != '+') { - ++ix; - } - - if(str[ix] == '-') { - sig = MP_NEG; - ++ix; - } else if(str[ix] == '+') { - sig = MP_ZPOS; /* this is the default anyway... */ - ++ix; - } - - while((val = s_mp_tovalue(str[ix], radix)) >= 0) { - if((res = s_mp_mul_d(mp, radix)) != MP_OKAY) - return res; - if((res = s_mp_add_d(mp, val)) != MP_OKAY) - return res; - ++ix; - } - - if(s_mp_cmp_d(mp, 0) == MP_EQ) - SIGN(mp) = MP_ZPOS; - else - SIGN(mp) = sig; - - return MP_OKAY; - -} /* end mp_read_radix() */ - -/* }}} */ - -/* {{{ mp_radix_size(mp, radix) */ - -int mp_radix_size(mp_int *mp, int radix) -{ - int len; - ARGCHK(mp != NULL, 0); - - len = s_mp_outlen(mp_count_bits(mp), radix) + 1; /* for NUL terminator */ - - if(mp_cmp_z(mp) < 0) - ++len; /* for sign */ - - return len; - -} /* end mp_radix_size() */ - -/* }}} */ - -/* {{{ mp_value_radix_size(num, qty, radix) */ - -/* num = number of digits - qty = number of bits per digit - radix = target base - - Return the number of digits in the specified radix that would be - needed to express 'num' digits of 'qty' bits each. - */ -int mp_value_radix_size(int num, int qty, int radix) -{ - ARGCHK(num >= 0 && qty > 0 && radix >= 2 && radix <= MAX_RADIX, 0); - - return s_mp_outlen(num * qty, radix); - -} /* end mp_value_radix_size() */ - -/* }}} */ - -/* {{{ mp_toradix(mp, str, radix) */ - -mp_err mp_toradix(mp_int *mp, unsigned char *str, int radix) -{ - int ix, pos = 0; - - ARGCHK(mp != NULL && str != NULL, MP_BADARG); - ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE); - - if(mp_cmp_z(mp) == MP_EQ) { - str[0] = '0'; - str[1] = '\0'; - } else { - mp_err res; - mp_int tmp; - mp_sign sgn; - mp_digit rem, rdx = (mp_digit)radix; - char ch; - - if((res = mp_init_copy(&tmp, mp)) != MP_OKAY) - return res; - - /* Save sign for later, and take absolute value */ - sgn = SIGN(&tmp); SIGN(&tmp) = MP_ZPOS; - - /* Generate output digits in reverse order */ - while(mp_cmp_z(&tmp) != 0) { - if((res = s_mp_div_d(&tmp, rdx, &rem)) != MP_OKAY) { - mp_clear(&tmp); - return res; - } - - /* Generate digits, use capital letters */ - ch = s_mp_todigit(rem, radix, 0); - - str[pos++] = ch; - } - - /* Add - sign if original value was negative */ - if(sgn == MP_NEG) - str[pos++] = '-'; - - /* Add trailing NUL to end the string */ - str[pos--] = '\0'; - - /* Reverse the digits and sign indicator */ - ix = 0; - while(ix < pos) { - char tmp = str[ix]; - - str[ix] = str[pos]; - str[pos] = tmp; - ++ix; - --pos; - } - - mp_clear(&tmp); - } - - return MP_OKAY; - -} /* end mp_toradix() */ - -/* }}} */ - -/* {{{ mp_char2value(ch, r) */ - -int mp_char2value(char ch, int r) -{ - return s_mp_tovalue(ch, r); - -} /* end mp_tovalue() */ - -/* }}} */ - -/* }}} */ - -/* {{{ mp_strerror(ec) */ - -/* - mp_strerror(ec) - - Return a string describing the meaning of error code 'ec'. The - string returned is allocated in static memory, so the caller should - not attempt to modify or free the memory associated with this - string. - */ -const char *mp_strerror(mp_err ec) -{ - int aec = (ec < 0) ? -ec : ec; - - /* Code values are negative, so the senses of these comparisons - are accurate */ - if(ec < MP_LAST_CODE || ec > MP_OKAY) { - return mp_err_string[0]; /* unknown error code */ - } else { - return mp_err_string[aec + 1]; - } - -} /* end mp_strerror() */ - -/* }}} */ - -/*========================================================================*/ -/*------------------------------------------------------------------------*/ -/* Static function definitions (internal use only) */ - -/* {{{ Memory management */ - -/* {{{ s_mp_grow(mp, min) */ - -/* Make sure there are at least 'min' digits allocated to mp */ -mp_err s_mp_grow(mp_int *mp, mp_size min) -{ - if(min > ALLOC(mp)) { - mp_digit *tmp; - - /* Set min to next nearest default precision block size */ - min = ((min + (s_mp_defprec - 1)) / s_mp_defprec) * s_mp_defprec; - - if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL) - return MP_MEM; - - s_mp_copy(DIGITS(mp), tmp, USED(mp)); - -#if MP_CRYPTO - s_mp_setz(DIGITS(mp), ALLOC(mp)); -#endif - s_mp_free(DIGITS(mp)); - DIGITS(mp) = tmp; - ALLOC(mp) = min; - } - - return MP_OKAY; - -} /* end s_mp_grow() */ - -/* }}} */ - -/* {{{ s_mp_pad(mp, min) */ - -/* Make sure the used size of mp is at least 'min', growing if needed */ -mp_err s_mp_pad(mp_int *mp, mp_size min) -{ - if(min > USED(mp)) { - mp_err res; - - /* Make sure there is room to increase precision */ - if(min > ALLOC(mp) && (res = s_mp_grow(mp, min)) != MP_OKAY) - return res; - - /* Increase precision; should already be 0-filled */ - USED(mp) = min; - } - - return MP_OKAY; - -} /* end s_mp_pad() */ - -/* }}} */ - -/* {{{ s_mp_setz(dp, count) */ - -#if MP_MACRO == 0 -/* Set 'count' digits pointed to by dp to be zeroes */ -void s_mp_setz(mp_digit *dp, mp_size count) -{ -#if MP_MEMSET == 0 - int ix; - - for(ix = 0; ix < count; ix++) - dp[ix] = 0; -#else - memset(dp, 0, count * sizeof(mp_digit)); -#endif - -} /* end s_mp_setz() */ -#endif - -/* }}} */ - -/* {{{ s_mp_copy(sp, dp, count) */ - -#if MP_MACRO == 0 -/* Copy 'count' digits from sp to dp */ -void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count) -{ -#if MP_MEMCPY == 0 - int ix; - - for(ix = 0; ix < count; ix++) - dp[ix] = sp[ix]; -#else - memcpy(dp, sp, count * sizeof(mp_digit)); -#endif - -} /* end s_mp_copy() */ -#endif - -/* }}} */ - -/* {{{ s_mp_alloc(nb, ni) */ - -#if MP_MACRO == 0 -/* Allocate ni records of nb bytes each, and return a pointer to that */ -void *s_mp_alloc(size_t nb, size_t ni) -{ - return calloc(nb, ni); - -} /* end s_mp_alloc() */ -#endif - -/* }}} */ - -/* {{{ s_mp_free(ptr) */ - -#if MP_MACRO == 0 -/* Free the memory pointed to by ptr */ -void s_mp_free(void *ptr) -{ - if(ptr) - free(ptr); - -} /* end s_mp_free() */ -#endif - -/* }}} */ - -/* {{{ s_mp_clamp(mp) */ - -/* Remove leading zeroes from the given value */ -void s_mp_clamp(mp_int *mp) -{ - mp_size du = USED(mp); - mp_digit *zp = DIGITS(mp) + du - 1; - - while(du > 1 && !*zp--) - --du; - - USED(mp) = du; - -} /* end s_mp_clamp() */ - - -/* }}} */ - -/* {{{ s_mp_exch(a, b) */ - -/* Exchange the data for a and b; (b, a) = (a, b) */ -void s_mp_exch(mp_int *a, mp_int *b) -{ - mp_int tmp; - - tmp = *a; - *a = *b; - *b = tmp; - -} /* end s_mp_exch() */ - -/* }}} */ - -/* }}} */ - -/* {{{ Arithmetic helpers */ - -/* {{{ s_mp_lshd(mp, p) */ - -/* - Shift mp leftward by p digits, growing if needed, and zero-filling - the in-shifted digits at the right end. This is a convenient - alternative to multiplication by powers of the radix - */ - -mp_err s_mp_lshd(mp_int *mp, mp_size p) -{ - mp_err res; - mp_size pos; - mp_digit *dp; - int ix; - - if(p == 0) - return MP_OKAY; - - if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY) - return res; - - pos = USED(mp) - 1; - dp = DIGITS(mp); - - /* Shift all the significant figures over as needed */ - for(ix = pos - p; ix >= 0; ix--) - dp[ix + p] = dp[ix]; - - /* Fill the bottom digits with zeroes */ - for(ix = 0; ix < p; ix++) - dp[ix] = 0; - - return MP_OKAY; - -} /* end s_mp_lshd() */ - -/* }}} */ - -/* {{{ s_mp_rshd(mp, p) */ - -/* - Shift mp rightward by p digits. Maintains the invariant that - digits above the precision are all zero. Digits shifted off the - end are lost. Cannot fail. - */ - -void s_mp_rshd(mp_int *mp, mp_size p) -{ - mp_size ix; - mp_digit *dp; - - if(p == 0) - return; - - /* Shortcut when all digits are to be shifted off */ - if(p >= USED(mp)) { - s_mp_setz(DIGITS(mp), ALLOC(mp)); - USED(mp) = 1; - SIGN(mp) = MP_ZPOS; - return; - } - - /* Shift all the significant figures over as needed */ - dp = DIGITS(mp); - for(ix = p; ix < USED(mp); ix++) - dp[ix - p] = dp[ix]; - - /* Fill the top digits with zeroes */ - ix -= p; - while(ix < USED(mp)) - dp[ix++] = 0; - - /* Strip off any leading zeroes */ - s_mp_clamp(mp); - -} /* end s_mp_rshd() */ - -/* }}} */ - -/* {{{ s_mp_div_2(mp) */ - -/* Divide by two -- take advantage of radix properties to do it fast */ -void s_mp_div_2(mp_int *mp) -{ - s_mp_div_2d(mp, 1); - -} /* end s_mp_div_2() */ - -/* }}} */ - -/* {{{ s_mp_mul_2(mp) */ - -mp_err s_mp_mul_2(mp_int *mp) -{ - int ix; - mp_digit kin = 0, kout, *dp = DIGITS(mp); - mp_err res; - - /* Shift digits leftward by 1 bit */ - for(ix = 0; ix < USED(mp); ix++) { - kout = (dp[ix] >> (DIGIT_BIT - 1)) & 1; - dp[ix] = (dp[ix] << 1) | kin; - - kin = kout; - } - - /* Deal with rollover from last digit */ - if(kin) { - if(ix >= ALLOC(mp)) { - if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY) - return res; - dp = DIGITS(mp); - } - - dp[ix] = kin; - USED(mp) += 1; - } - - return MP_OKAY; - -} /* end s_mp_mul_2() */ - -/* }}} */ - -/* {{{ s_mp_mod_2d(mp, d) */ - -/* - Remainder the integer by 2^d, where d is a number of bits. This - amounts to a bitwise AND of the value, and does not require the full - division code - */ -void s_mp_mod_2d(mp_int *mp, mp_digit d) -{ - unsigned int ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT); - unsigned int ix; - mp_digit dmask, *dp = DIGITS(mp); - - if(ndig >= USED(mp)) - return; - - /* Flush all the bits above 2^d in its digit */ - dmask = (1 << nbit) - 1; - dp[ndig] &= dmask; - - /* Flush all digits above the one with 2^d in it */ - for(ix = ndig + 1; ix < USED(mp); ix++) - dp[ix] = 0; - - s_mp_clamp(mp); - -} /* end s_mp_mod_2d() */ - -/* }}} */ - -/* {{{ s_mp_mul_2d(mp, d) */ - -/* - Multiply by the integer 2^d, where d is a number of bits. This - amounts to a bitwise shift of the value, and does not require the - full multiplication code. - */ -mp_err s_mp_mul_2d(mp_int *mp, mp_digit d) -{ - mp_err res; - mp_digit save, next, mask, *dp; - mp_size used; - int ix; - - if((res = s_mp_lshd(mp, d / DIGIT_BIT)) != MP_OKAY) - return res; - - dp = DIGITS(mp); used = USED(mp); - d %= DIGIT_BIT; - - mask = (1 << d) - 1; - - /* If the shift requires another digit, make sure we've got one to - work with */ - if((dp[used - 1] >> (DIGIT_BIT - d)) & mask) { - if((res = s_mp_grow(mp, used + 1)) != MP_OKAY) - return res; - dp = DIGITS(mp); - } - - /* Do the shifting... */ - save = 0; - for(ix = 0; ix < used; ix++) { - next = (dp[ix] >> (DIGIT_BIT - d)) & mask; - dp[ix] = (dp[ix] << d) | save; - save = next; - } - - /* If, at this point, we have a nonzero carryout into the next - digit, we'll increase the size by one digit, and store it... - */ - if(save) { - dp[used] = save; - USED(mp) += 1; - } - - s_mp_clamp(mp); - return MP_OKAY; - -} /* end s_mp_mul_2d() */ - -/* }}} */ - -/* {{{ s_mp_div_2d(mp, d) */ - -/* - Divide the integer by 2^d, where d is a number of bits. This - amounts to a bitwise shift of the value, and does not require the - full division code (used in Barrett reduction, see below) - */ -void s_mp_div_2d(mp_int *mp, mp_digit d) -{ - int ix; - mp_digit save, next, mask, *dp = DIGITS(mp); - - s_mp_rshd(mp, d / DIGIT_BIT); - d %= DIGIT_BIT; - - mask = (1 << d) - 1; - - save = 0; - for(ix = USED(mp) - 1; ix >= 0; ix--) { - next = dp[ix] & mask; - dp[ix] = (dp[ix] >> d) | (save << (DIGIT_BIT - d)); - save = next; - } - - s_mp_clamp(mp); - -} /* end s_mp_div_2d() */ - -/* }}} */ - -/* {{{ s_mp_norm(a, b) */ - -/* - s_mp_norm(a, b) - - Normalize a and b for division, where b is the divisor. In order - that we might make good guesses for quotient digits, we want the - leading digit of b to be at least half the radix, which we - accomplish by multiplying a and b by a constant. This constant is - returned (so that it can be divided back out of the remainder at the - end of the division process). - - We multiply by the smallest power of 2 that gives us a leading digit - at least half the radix. By choosing a power of 2, we simplify the - multiplication and division steps to simple shifts. - */ -mp_digit s_mp_norm(mp_int *a, mp_int *b) -{ - mp_digit t, d = 0; - - t = DIGIT(b, USED(b) - 1); - while(t < (RADIX / 2)) { - t <<= 1; - ++d; - } - - if(d != 0) { - s_mp_mul_2d(a, d); - s_mp_mul_2d(b, d); - } - - return d; - -} /* end s_mp_norm() */ - -/* }}} */ - -/* }}} */ - -/* {{{ Primitive digit arithmetic */ - -/* {{{ s_mp_add_d(mp, d) */ - -/* Add d to |mp| in place */ -mp_err s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */ -{ - mp_word w, k = 0; - mp_size ix = 1, used = USED(mp); - mp_digit *dp = DIGITS(mp); - - w = dp[0] + d; - dp[0] = ACCUM(w); - k = CARRYOUT(w); - - while(ix < used && k) { - w = dp[ix] + k; - dp[ix] = ACCUM(w); - k = CARRYOUT(w); - ++ix; - } - - if(k != 0) { - mp_err res; - - if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY) - return res; - - DIGIT(mp, ix) = k; - } - - return MP_OKAY; - -} /* end s_mp_add_d() */ - -/* }}} */ - -/* {{{ s_mp_sub_d(mp, d) */ - -/* Subtract d from |mp| in place, assumes |mp| > d */ -mp_err s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */ -{ - mp_word w, b = 0; - mp_size ix = 1, used = USED(mp); - mp_digit *dp = DIGITS(mp); - - /* Compute initial subtraction */ - w = (RADIX + dp[0]) - d; - b = CARRYOUT(w) ? 0 : 1; - dp[0] = ACCUM(w); - - /* Propagate borrows leftward */ - while(b && ix < used) { - w = (RADIX + dp[ix]) - b; - b = CARRYOUT(w) ? 0 : 1; - dp[ix] = ACCUM(w); - ++ix; - } - - /* Remove leading zeroes */ - s_mp_clamp(mp); - - /* If we have a borrow out, it's a violation of the input invariant */ - if(b) - return MP_RANGE; - else - return MP_OKAY; - -} /* end s_mp_sub_d() */ - -/* }}} */ - -/* {{{ s_mp_mul_d(a, d) */ - -/* Compute a = a * d, single digit multiplication */ -mp_err s_mp_mul_d(mp_int *a, mp_digit d) -{ - mp_word w, k = 0; - mp_size ix, max; - mp_err res; - mp_digit *dp = DIGITS(a); - - /* - Single-digit multiplication will increase the precision of the - output by at most one digit. However, we can detect when this - will happen -- if the high-order digit of a, times d, gives a - two-digit result, then the precision of the result will increase; - otherwise it won't. We use this fact to avoid calling s_mp_pad() - unless absolutely necessary. - */ - max = USED(a); - w = dp[max - 1] * d; - if(CARRYOUT(w) != 0) { - if((res = s_mp_pad(a, max + 1)) != MP_OKAY) - return res; - dp = DIGITS(a); - } - - for(ix = 0; ix < max; ix++) { - w = (dp[ix] * d) + k; - dp[ix] = ACCUM(w); - k = CARRYOUT(w); - } - - /* If there is a precision increase, take care of it here; the above - test guarantees we have enough storage to do this safely. - */ - if(k) { - dp[max] = k; - USED(a) = max + 1; - } - - s_mp_clamp(a); - - return MP_OKAY; - -} /* end s_mp_mul_d() */ - -/* }}} */ - -/* {{{ s_mp_div_d(mp, d, r) */ - -/* - s_mp_div_d(mp, d, r) - - Compute the quotient mp = mp / d and remainder r = mp mod d, for a - single digit d. If r is null, the remainder will be discarded. - */ - -mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r) -{ - mp_word w = 0, t; - mp_int quot; - mp_err res; - mp_digit *dp = DIGITS(mp), *qp; - int ix; - - if(d == 0) - return MP_RANGE; - - /* Make room for the quotient */ - if((res = mp_init_size(", USED(mp))) != MP_OKAY) - return res; - - USED(") = USED(mp); /* so clamping will work below */ - qp = DIGITS("); - - /* Divide without subtraction */ - for(ix = USED(mp) - 1; ix >= 0; ix--) { - w = (w << DIGIT_BIT) | dp[ix]; - - if(w >= d) { - t = w / d; - w = w % d; - } else { - t = 0; - } - - qp[ix] = t; - } - - /* Deliver the remainder, if desired */ - if(r) - *r = w; - - s_mp_clamp("); - mp_exch(", mp); - mp_clear("); - - return MP_OKAY; - -} /* end s_mp_div_d() */ - -/* }}} */ - -/* }}} */ - -/* {{{ Primitive full arithmetic */ - -/* {{{ s_mp_add(a, b) */ - -/* Compute a = |a| + |b| */ -mp_err s_mp_add(mp_int *a, mp_int *b) /* magnitude addition */ -{ - mp_word w = 0; - mp_digit *pa, *pb; - mp_size ix, used = USED(b); - mp_err res; - - /* Make sure a has enough precision for the output value */ - if((used > USED(a)) && (res = s_mp_pad(a, used)) != MP_OKAY) - return res; - - /* - Add up all digits up to the precision of b. If b had initially - the same precision as a, or greater, we took care of it by the - padding step above, so there is no problem. If b had initially - less precision, we'll have to make sure the carry out is duly - propagated upward among the higher-order digits of the sum. - */ - pa = DIGITS(a); - pb = DIGITS(b); - for(ix = 0; ix < used; ++ix) { - w += *pa + *pb++; - *pa++ = ACCUM(w); - w = CARRYOUT(w); - } - - /* If we run out of 'b' digits before we're actually done, make - sure the carries get propagated upward... - */ - used = USED(a); - while(w && ix < used) { - w += *pa; - *pa++ = ACCUM(w); - w = CARRYOUT(w); - ++ix; - } - - /* If there's an overall carry out, increase precision and include - it. We could have done this initially, but why touch the memory - allocator unless we're sure we have to? - */ - if(w) { - if((res = s_mp_pad(a, used + 1)) != MP_OKAY) - return res; - - DIGIT(a, ix) = w; /* pa may not be valid after s_mp_pad() call */ - } - - return MP_OKAY; - -} /* end s_mp_add() */ - -/* }}} */ - -/* {{{ s_mp_sub(a, b) */ - -/* Compute a = |a| - |b|, assumes |a| >= |b| */ -mp_err s_mp_sub(mp_int *a, mp_int *b) /* magnitude subtract */ -{ - mp_word w = 0; - mp_digit *pa, *pb; - mp_size ix, used = USED(b); - - /* - Subtract and propagate borrow. Up to the precision of b, this - accounts for the digits of b; after that, we just make sure the - carries get to the right place. This saves having to pad b out to - the precision of a just to make the loops work right... - */ - pa = DIGITS(a); - pb = DIGITS(b); - - for(ix = 0; ix < used; ++ix) { - w = (RADIX + *pa) - w - *pb++; - *pa++ = ACCUM(w); - w = CARRYOUT(w) ? 0 : 1; - } - - used = USED(a); - while(ix < used) { - w = RADIX + *pa - w; - *pa++ = ACCUM(w); - w = CARRYOUT(w) ? 0 : 1; - ++ix; - } - - /* Clobber any leading zeroes we created */ - s_mp_clamp(a); - - /* - If there was a borrow out, then |b| > |a| in violation - of our input invariant. We've already done the work, - but we'll at least complain about it... - */ - if(w) - return MP_RANGE; - else - return MP_OKAY; - -} /* end s_mp_sub() */ - -/* }}} */ - -mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu) -{ - mp_int q; - mp_err res; - mp_size um = USED(m); - - if((res = mp_init_copy(&q, x)) != MP_OKAY) - return res; - - s_mp_rshd(&q, um - 1); /* q1 = x / b^(k-1) */ - s_mp_mul(&q, mu); /* q2 = q1 * mu */ - s_mp_rshd(&q, um + 1); /* q3 = q2 / b^(k+1) */ - - /* x = x mod b^(k+1), quick (no division) */ - s_mp_mod_2d(x, (mp_digit)(DIGIT_BIT * (um + 1))); - - /* q = q * m mod b^(k+1), quick (no division), uses the short multiplier */ -#ifndef SHRT_MUL - s_mp_mul(&q, m); - s_mp_mod_2d(&q, (mp_digit)(DIGIT_BIT * (um + 1))); -#else - s_mp_mul_dig(&q, m, um + 1); -#endif - - /* x = x - q */ - if((res = mp_sub(x, &q, x)) != MP_OKAY) - goto CLEANUP; - - /* If x < 0, add b^(k+1) to it */ - if(mp_cmp_z(x) < 0) { - mp_set(&q, 1); - if((res = s_mp_lshd(&q, um + 1)) != MP_OKAY) - goto CLEANUP; - if((res = mp_add(x, &q, x)) != MP_OKAY) - goto CLEANUP; - } - - /* Back off if it's too big */ - while(mp_cmp(x, m) >= 0) { - if((res = s_mp_sub(x, m)) != MP_OKAY) - break; - } - - CLEANUP: - mp_clear(&q); - - return res; - -} /* end s_mp_reduce() */ - - - -/* {{{ s_mp_mul(a, b) */ - -/* Compute a = |a| * |b| */ -mp_err s_mp_mul(mp_int *a, mp_int *b) -{ - mp_word w, k = 0; - mp_int tmp; - mp_err res; - mp_size ix, jx, ua = USED(a), ub = USED(b); - mp_digit *pa, *pb, *pt, *pbt; - - if((res = mp_init_size(&tmp, ua + ub)) != MP_OKAY) - return res; - - /* This has the effect of left-padding with zeroes... */ - USED(&tmp) = ua + ub; - - /* We're going to need the base value each iteration */ - pbt = DIGITS(&tmp); - - /* Outer loop: Digits of b */ - - pb = DIGITS(b); - for(ix = 0; ix < ub; ++ix, ++pb) { - if(*pb == 0) - continue; - - /* Inner product: Digits of a */ - pa = DIGITS(a); - for(jx = 0; jx < ua; ++jx, ++pa) { - pt = pbt + ix + jx; - w = *pb * *pa + k + *pt; - *pt = ACCUM(w); - k = CARRYOUT(w); - } - - pbt[ix + jx] = k; - k = 0; - } - - s_mp_clamp(&tmp); - s_mp_exch(&tmp, a); - - mp_clear(&tmp); - - return MP_OKAY; - -} /* end s_mp_mul() */ - -/* }}} */ - -/* {{{ s_mp_kmul(a, b, out, len) */ - -#if 0 -void s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len) -{ - mp_word w, k = 0; - mp_size ix, jx; - mp_digit *pa, *pt; - - for(ix = 0; ix < len; ++ix, ++b) { - if(*b == 0) - continue; - - pa = a; - for(jx = 0; jx < len; ++jx, ++pa) { - pt = out + ix + jx; - w = *b * *pa + k + *pt; - *pt = ACCUM(w); - k = CARRYOUT(w); - } - - out[ix + jx] = k; - k = 0; - } - -} /* end s_mp_kmul() */ -#endif - -/* }}} */ - -/* {{{ s_mp_sqr(a) */ - -/* - Computes the square of a, in place. This can be done more - efficiently than a general multiplication, because many of the - computation steps are redundant when squaring. The inner product - step is a bit more complicated, but we save a fair number of - iterations of the multiplication loop. - */ -#if MP_SQUARE -mp_err s_mp_sqr(mp_int *a) -{ - mp_word w, k = 0; - mp_int tmp; - mp_err res; - mp_size ix, jx, kx, used = USED(a); - mp_digit *pa1, *pa2, *pt, *pbt; - - if((res = mp_init_size(&tmp, 2 * used)) != MP_OKAY) - return res; - - /* Left-pad with zeroes */ - USED(&tmp) = 2 * used; - - /* We need the base value each time through the loop */ - pbt = DIGITS(&tmp); - - pa1 = DIGITS(a); - for(ix = 0; ix < used; ++ix, ++pa1) { - if(*pa1 == 0) - continue; - - w = DIGIT(&tmp, ix + ix) + (*pa1 * *pa1); - - pbt[ix + ix] = ACCUM(w); - k = CARRYOUT(w); - - /* - The inner product is computed as: - - (C, S) = t[i,j] + 2 a[i] a[j] + C - - This can overflow what can be represented in an mp_word, and - since C arithmetic does not provide any way to check for - overflow, we have to check explicitly for overflow conditions - before they happen. - */ - for(jx = ix + 1, pa2 = DIGITS(a) + jx; jx < used; ++jx, ++pa2) { - mp_word u = 0, v; - - /* Store this in a temporary to avoid indirections later */ - pt = pbt + ix + jx; - - /* Compute the multiplicative step */ - w = *pa1 * *pa2; - - /* If w is more than half MP_WORD_MAX, the doubling will - overflow, and we need to record a carry out into the next - word */ - u = (w >> (MP_WORD_BIT - 1)) & 1; - - /* Double what we've got, overflow will be ignored as defined - for C arithmetic (we've already noted if it is to occur) - */ - w *= 2; - - /* Compute the additive step */ - v = *pt + k; - - /* If we do not already have an overflow carry, check to see - if the addition will cause one, and set the carry out if so - */ - u |= ((MP_WORD_MAX - v) < w); - - /* Add in the rest, again ignoring overflow */ - w += v; - - /* Set the i,j digit of the output */ - *pt = ACCUM(w); - - /* Save carry information for the next iteration of the loop. - This is why k must be an mp_word, instead of an mp_digit */ - k = CARRYOUT(w) | (u << DIGIT_BIT); - - } /* for(jx ...) */ - - /* Set the last digit in the cycle and reset the carry */ - k = DIGIT(&tmp, ix + jx) + k; - pbt[ix + jx] = ACCUM(k); - k = CARRYOUT(k); - - /* If we are carrying out, propagate the carry to the next digit - in the output. This may cascade, so we have to be somewhat - circumspect -- but we will have enough precision in the output - that we won't overflow - */ - kx = 1; - while(k) { - k = pbt[ix + jx + kx] + 1; - pbt[ix + jx + kx] = ACCUM(k); - k = CARRYOUT(k); - ++kx; - } - } /* for(ix ...) */ - - s_mp_clamp(&tmp); - s_mp_exch(&tmp, a); - - mp_clear(&tmp); - - return MP_OKAY; - -} /* end s_mp_sqr() */ -#endif - -/* }}} */ - -/* {{{ s_mp_div(a, b) */ - -/* - s_mp_div(a, b) - - Compute a = a / b and b = a mod b. Assumes b > a. - */ - -mp_err s_mp_div(mp_int *a, mp_int *b) -{ - mp_int quot, rem, t; - mp_word q; - mp_err res; - mp_digit d; - int ix; - - if(mp_cmp_z(b) == 0) - return MP_RANGE; - - /* Shortcut if b is power of two */ - if((ix = s_mp_ispow2(b)) >= 0) { - mp_copy(a, b); /* need this for remainder */ - s_mp_div_2d(a, (mp_digit)ix); - s_mp_mod_2d(b, (mp_digit)ix); - - return MP_OKAY; - } - - /* Allocate space to store the quotient */ - if((res = mp_init_size(", USED(a))) != MP_OKAY) - return res; - - /* A working temporary for division */ - if((res = mp_init_size(&t, USED(a))) != MP_OKAY) - goto T; - - /* Allocate space for the remainder */ - if((res = mp_init_size(&rem, USED(a))) != MP_OKAY) - goto REM; - - /* Normalize to optimize guessing */ - d = s_mp_norm(a, b); - - /* Perform the division itself...woo! */ - ix = USED(a) - 1; - - while(ix >= 0) { - /* Find a partial substring of a which is at least b */ - while(s_mp_cmp(&rem, b) < 0 && ix >= 0) { - if((res = s_mp_lshd(&rem, 1)) != MP_OKAY) - goto CLEANUP; - - if((res = s_mp_lshd(", 1)) != MP_OKAY) - goto CLEANUP; - - DIGIT(&rem, 0) = DIGIT(a, ix); - s_mp_clamp(&rem); - --ix; - } - - /* If we didn't find one, we're finished dividing */ - if(s_mp_cmp(&rem, b) < 0) - break; - - /* Compute a guess for the next quotient digit */ - q = DIGIT(&rem, USED(&rem) - 1); - if(q <= DIGIT(b, USED(b) - 1) && USED(&rem) > 1) - q = (q << DIGIT_BIT) | DIGIT(&rem, USED(&rem) - 2); - - q /= DIGIT(b, USED(b) - 1); - - /* The guess can be as much as RADIX + 1 */ - if(q >= RADIX) - q = RADIX - 1; - - /* See what that multiplies out to */ - mp_copy(b, &t); - if((res = s_mp_mul_d(&t, q)) != MP_OKAY) - goto CLEANUP; - - /* - If it's too big, back it off. We should not have to do this - more than once, or, in rare cases, twice. Knuth describes a - method by which this could be reduced to a maximum of once, but - I didn't implement that here. - */ - while(s_mp_cmp(&t, &rem) > 0) { - --q; - s_mp_sub(&t, b); - } - - /* At this point, q should be the right next digit */ - if((res = s_mp_sub(&rem, &t)) != MP_OKAY) - goto CLEANUP; - - /* - Include the digit in the quotient. We allocated enough memory - for any quotient we could ever possibly get, so we should not - have to check for failures here - */ - DIGIT(", 0) = q; - } - - /* Denormalize remainder */ - if(d != 0) - s_mp_div_2d(&rem, d); - - s_mp_clamp("); - s_mp_clamp(&rem); - - /* Copy quotient back to output */ - s_mp_exch(", a); - - /* Copy remainder back to output */ - s_mp_exch(&rem, b); - -CLEANUP: - mp_clear(&rem); -REM: - mp_clear(&t); -T: - mp_clear("); - - return res; - -} /* end s_mp_div() */ - -/* }}} */ - -/* {{{ s_mp_2expt(a, k) */ - -mp_err s_mp_2expt(mp_int *a, mp_digit k) -{ - mp_err res; - mp_size dig, bit; - - dig = k / DIGIT_BIT; - bit = k % DIGIT_BIT; - - mp_zero(a); - if((res = s_mp_pad(a, dig + 1)) != MP_OKAY) - return res; - - DIGIT(a, dig) |= (1 << bit); - - return MP_OKAY; - -} /* end s_mp_2expt() */ - -/* }}} */ - - -/* }}} */ - -/* }}} */ - -/* {{{ Primitive comparisons */ - -/* {{{ s_mp_cmp(a, b) */ - -/* Compare |a| <=> |b|, return 0 if equal, <0 if a0 if a>b */ -int s_mp_cmp(mp_int *a, mp_int *b) -{ - mp_size ua = USED(a), ub = USED(b); - - if(ua > ub) - return MP_GT; - else if(ua < ub) - return MP_LT; - else { - int ix = ua - 1; - mp_digit *ap = DIGITS(a) + ix, *bp = DIGITS(b) + ix; - - while(ix >= 0) { - if(*ap > *bp) - return MP_GT; - else if(*ap < *bp) - return MP_LT; - - --ap; --bp; --ix; - } - - return MP_EQ; - } - -} /* end s_mp_cmp() */ - -/* }}} */ - -/* {{{ s_mp_cmp_d(a, d) */ - -/* Compare |a| <=> d, return 0 if equal, <0 if a0 if a>d */ -int s_mp_cmp_d(mp_int *a, mp_digit d) -{ - mp_size ua = USED(a); - mp_digit *ap = DIGITS(a); - - if(ua > 1) - return MP_GT; - - if(*ap < d) - return MP_LT; - else if(*ap > d) - return MP_GT; - else - return MP_EQ; - -} /* end s_mp_cmp_d() */ - -/* }}} */ - -/* {{{ s_mp_ispow2(v) */ - -/* - Returns -1 if the value is not a power of two; otherwise, it returns - k such that v = 2^k, i.e. lg(v). - */ -int s_mp_ispow2(mp_int *v) -{ - mp_digit d, *dp; - mp_size uv = USED(v); - int extra = 0, ix; - - d = DIGIT(v, uv - 1); /* most significant digit of v */ - - while(d && ((d & 1) == 0)) { - d >>= 1; - ++extra; - } - - if(d == 1) { - ix = uv - 2; - dp = DIGITS(v) + ix; - - while(ix >= 0) { - if(*dp) - return -1; /* not a power of two */ - - --dp; --ix; - } - - return ((uv - 1) * DIGIT_BIT) + extra; - } - - return -1; - -} /* end s_mp_ispow2() */ - -/* }}} */ - -/* {{{ s_mp_ispow2d(d) */ - -int s_mp_ispow2d(mp_digit d) -{ - int pow = 0; - - while((d & 1) == 0) { - ++pow; d >>= 1; - } - - if(d == 1) - return pow; - - return -1; - -} /* end s_mp_ispow2d() */ - -/* }}} */ - -/* }}} */ - -/* {{{ Primitive I/O helpers */ - -/* {{{ s_mp_tovalue(ch, r) */ - -/* - Convert the given character to its digit value, in the given radix. - If the given character is not understood in the given radix, -1 is - returned. Otherwise the digit's numeric value is returned. - - The results will be odd if you use a radix < 2 or > 62, you are - expected to know what you're up to. - */ -int s_mp_tovalue(char ch, int r) -{ - int val, xch; - - if(r > 36) - xch = ch; - else - xch = toupper(ch); - - if(isdigit(xch)) - val = xch - '0'; - else if(isupper(xch)) - val = xch - 'A' + 10; - else if(islower(xch)) - val = xch - 'a' + 36; - else if(xch == '+') - val = 62; - else if(xch == '/') - val = 63; - else - return -1; - - if(val < 0 || val >= r) - return -1; - - return val; - -} /* end s_mp_tovalue() */ - -/* }}} */ - -/* {{{ s_mp_todigit(val, r, low) */ - -/* - Convert val to a radix-r digit, if possible. If val is out of range - for r, returns zero. Otherwise, returns an ASCII character denoting - the value in the given radix. - - The results may be odd if you use a radix < 2 or > 64, you are - expected to know what you're doing. - */ - -char s_mp_todigit(int val, int r, int low) -{ - char ch; - - if(val < 0 || val >= r) - return 0; - - ch = s_dmap_1[val]; - - if(r <= 36 && low) - ch = tolower(ch); - - return ch; - -} /* end s_mp_todigit() */ - -/* }}} */ - -/* {{{ s_mp_outlen(bits, radix) */ - -/* - Return an estimate for how long a string is needed to hold a radix - r representation of a number with 'bits' significant bits. - - Does not include space for a sign or a NUL terminator. - */ -int s_mp_outlen(int bits, int r) -{ - return (int)((double)bits * LOG_V_2(r)); - -} /* end s_mp_outlen() */ - -/* }}} */ - -/* }}} */ - -/*------------------------------------------------------------------------*/ -/* HERE THERE BE DRAGONS */ -/* crc==4242132123, version==2, Sat Feb 02 06:43:52 2002 */ +/* + mpi.c + + by Michael J. Fromberger + Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved + + Arbitrary precision integer arithmetic library + + $ID$ + */ + +#include "mpi.h" +#include +#include +#include + +#if MP_DEBUG +#include + +#define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);} +#else +#define DIAG(T,V) +#endif + +/* + If MP_LOGTAB is not defined, use the math library to compute the + logarithms on the fly. Otherwise, use the static table below. + Pick which works best for your system. + */ +#if MP_LOGTAB + +/* {{{ s_logv_2[] - log table for 2 in various bases */ + +/* + A table of the logs of 2 for various bases (the 0 and 1 entries of + this table are meaningless and should not be referenced). + + This table is used to compute output lengths for the mp_toradix() + function. Since a number n in radix r takes up about log_r(n) + digits, we estimate the output size by taking the least integer + greater than log_r(n), where: + + log_r(n) = log_2(n) * log_r(2) + + This table, therefore, is a table of log_r(2) for 2 <= r <= 36, + which are the output bases supported. + */ + +#include "logtab.h" + +/* }}} */ +#define LOG_V_2(R) s_logv_2[(R)] + +#else + +#include +#define LOG_V_2(R) (log(2.0)/log(R)) + +#endif + +/* Default precision for newly created mp_int's */ +static unsigned int s_mp_defprec = MP_DEFPREC; + +/* {{{ Digit arithmetic macros */ + +/* + When adding and multiplying digits, the results can be larger than + can be contained in an mp_digit. Thus, an mp_word is used. These + macros mask off the upper and lower digits of the mp_word (the + mp_word may be more than 2 mp_digits wide, but we only concern + ourselves with the low-order 2 mp_digits) + + If your mp_word DOES have more than 2 mp_digits, you need to + uncomment the first line, and comment out the second. + */ + +/* #define CARRYOUT(W) (((W)>>DIGIT_BIT)&MP_DIGIT_MAX) */ +#define CARRYOUT(W) ((W)>>DIGIT_BIT) +#define ACCUM(W) ((W)&MP_DIGIT_MAX) + +/* }}} */ + +/* {{{ Comparison constants */ + +#define MP_LT -1 +#define MP_EQ 0 +#define MP_GT 1 + +/* }}} */ + +/* {{{ Constant strings */ + +/* Constant strings returned by mp_strerror() */ +static const char *mp_err_string[] = { + "unknown result code", /* say what? */ + "boolean true", /* MP_OKAY, MP_YES */ + "boolean false", /* MP_NO */ + "out of memory", /* MP_MEM */ + "argument out of range", /* MP_RANGE */ + "invalid input parameter", /* MP_BADARG */ + "result is undefined" /* MP_UNDEF */ +}; + +/* Value to digit maps for radix conversion */ + +/* s_dmap_1 - standard digits and letters */ +static const char *s_dmap_1 = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +#if 0 +/* s_dmap_2 - base64 ordering for digits */ +static const char *s_dmap_2 = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +#endif + +/* }}} */ + +/* {{{ Static function declarations */ + +/* + If MP_MACRO is false, these will be defined as actual functions; + otherwise, suitable macro definitions will be used. This works + around the fact that ANSI C89 doesn't support an 'inline' keyword + (although I hear C9x will ... about bloody time). At present, the + macro definitions are identical to the function bodies, but they'll + expand in place, instead of generating a function call. + + I chose these particular functions to be made into macros because + some profiling showed they are called a lot on a typical workload, + and yet they are primarily housekeeping. + */ +#if MP_MACRO == 0 + void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */ + void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count); /* copy */ + void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */ + void s_mp_free(void *ptr); /* general free function */ +#else + + /* Even if these are defined as macros, we need to respect the settings + of the MP_MEMSET and MP_MEMCPY configuration options... + */ + #if MP_MEMSET == 0 + #define s_mp_setz(dp, count) \ + {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;} + #else + #define s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit)) + #endif /* MP_MEMSET */ + + #if MP_MEMCPY == 0 + #define s_mp_copy(sp, dp, count) \ + {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];} + #else + #define s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit)) + #endif /* MP_MEMCPY */ + + #define s_mp_alloc(nb, ni) calloc(nb, ni) + #define s_mp_free(ptr) {if(ptr) free(ptr);} +#endif /* MP_MACRO */ + +mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */ +mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */ + +void s_mp_clamp(mp_int *mp); /* clip leading zeroes */ + +void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */ + +mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */ +void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */ +void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */ +void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */ +mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place*/ +void s_mp_div_2(mp_int *mp); /* divide by 2 in place */ +mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */ +mp_digit s_mp_norm(mp_int *a, mp_int *b); /* normalize for division */ +mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */ +mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */ +mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */ +mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r); + /* unsigned digit divide */ +mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu); + /* Barrett reduction */ +mp_err s_mp_add(mp_int *a, mp_int *b); /* magnitude addition */ +mp_err s_mp_sub(mp_int *a, mp_int *b); /* magnitude subtract */ +mp_err s_mp_mul(mp_int *a, mp_int *b); /* magnitude multiply */ +#if 0 +void s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len); + /* multiply buffers in place */ +#endif +#if MP_SQUARE +mp_err s_mp_sqr(mp_int *a); /* magnitude square */ +#else +#define s_mp_sqr(a) s_mp_mul(a, a) +#endif +mp_err s_mp_div(mp_int *a, mp_int *b); /* magnitude divide */ +mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */ +int s_mp_cmp(mp_int *a, mp_int *b); /* magnitude comparison */ +int s_mp_cmp_d(mp_int *a, mp_digit d); /* magnitude digit compare */ +int s_mp_ispow2(mp_int *v); /* is v a power of 2? */ +int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */ + +int s_mp_tovalue(char ch, int r); /* convert ch to value */ +char s_mp_todigit(int val, int r, int low); /* convert val to digit */ +int s_mp_outlen(int bits, int r); /* output length in bytes */ + +/* }}} */ + +/* {{{ Default precision manipulation */ + +unsigned int mp_get_prec(void) +{ + return s_mp_defprec; + +} /* end mp_get_prec() */ + +void mp_set_prec(unsigned int prec) +{ + if(prec == 0) + s_mp_defprec = MP_DEFPREC; + else + s_mp_defprec = prec; + +} /* end mp_set_prec() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ mp_init(mp) */ + +/* + mp_init(mp) + + Initialize a new zero-valued mp_int. Returns MP_OKAY if successful, + MP_MEM if memory could not be allocated for the structure. + */ + +mp_err mp_init(mp_int *mp) +{ + return mp_init_size(mp, s_mp_defprec); + +} /* end mp_init() */ + +/* }}} */ + +/* {{{ mp_init_array(mp[], count) */ + +mp_err mp_init_array(mp_int mp[], int count) +{ + mp_err res; + int pos; + + ARGCHK(mp !=NULL && count > 0, MP_BADARG); + + for(pos = 0; pos < count; ++pos) { + if((res = mp_init(&mp[pos])) != MP_OKAY) + goto CLEANUP; + } + + return MP_OKAY; + + CLEANUP: + while(--pos >= 0) + mp_clear(&mp[pos]); + + return res; + +} /* end mp_init_array() */ + +/* }}} */ + +/* {{{ mp_init_size(mp, prec) */ + +/* + mp_init_size(mp, prec) + + Initialize a new zero-valued mp_int with at least the given + precision; returns MP_OKAY if successful, or MP_MEM if memory could + not be allocated for the structure. + */ + +mp_err mp_init_size(mp_int *mp, mp_size prec) +{ + ARGCHK(mp != NULL && prec > 0, MP_BADARG); + + if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL) + return MP_MEM; + + SIGN(mp) = MP_ZPOS; + USED(mp) = 1; + ALLOC(mp) = prec; + + return MP_OKAY; + +} /* end mp_init_size() */ + +/* }}} */ + +/* {{{ mp_init_copy(mp, from) */ + +/* + mp_init_copy(mp, from) + + Initialize mp as an exact copy of from. Returns MP_OKAY if + successful, MP_MEM if memory could not be allocated for the new + structure. + */ + +mp_err mp_init_copy(mp_int *mp, mp_int *from) +{ + ARGCHK(mp != NULL && from != NULL, MP_BADARG); + + if(mp == from) + return MP_OKAY; + + if((DIGITS(mp) = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), DIGITS(mp), USED(from)); + USED(mp) = USED(from); + ALLOC(mp) = USED(from); + SIGN(mp) = SIGN(from); + + return MP_OKAY; + +} /* end mp_init_copy() */ + +/* }}} */ + +/* {{{ mp_copy(from, to) */ + +/* + mp_copy(from, to) + + Copies the mp_int 'from' to the mp_int 'to'. It is presumed that + 'to' has already been initialized (if not, use mp_init_copy() + instead). If 'from' and 'to' are identical, nothing happens. + */ + +mp_err mp_copy(mp_int *from, mp_int *to) +{ + ARGCHK(from != NULL && to != NULL, MP_BADARG); + + if(from == to) + return MP_OKAY; + + { /* copy */ + mp_digit *tmp; + + /* + If the allocated buffer in 'to' already has enough space to hold + all the used digits of 'from', we'll re-use it to avoid hitting + the memory allocater more than necessary; otherwise, we'd have + to grow anyway, so we just allocate a hunk and make the copy as + usual + */ + if(ALLOC(to) >= USED(from)) { + s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from)); + s_mp_copy(DIGITS(from), DIGITS(to), USED(from)); + + } else { + if((tmp = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), tmp, USED(from)); + + if(DIGITS(to) != NULL) { +#if MP_CRYPTO + s_mp_setz(DIGITS(to), ALLOC(to)); +#endif + s_mp_free(DIGITS(to)); + } + + DIGITS(to) = tmp; + ALLOC(to) = USED(from); + } + + /* Copy the precision and sign from the original */ + USED(to) = USED(from); + SIGN(to) = SIGN(from); + } /* end copy */ + + return MP_OKAY; + +} /* end mp_copy() */ + +/* }}} */ + +/* {{{ mp_exch(mp1, mp2) */ + +/* + mp_exch(mp1, mp2) + + Exchange mp1 and mp2 without allocating any intermediate memory + (well, unless you count the stack space needed for this call and the + locals it creates...). This cannot fail. + */ + +void mp_exch(mp_int *mp1, mp_int *mp2) +{ +#if MP_ARGCHK == 2 + assert(mp1 != NULL && mp2 != NULL); +#else + if(mp1 == NULL || mp2 == NULL) + return; +#endif + + s_mp_exch(mp1, mp2); + +} /* end mp_exch() */ + +/* }}} */ + +/* {{{ mp_clear(mp) */ + +/* + mp_clear(mp) + + Release the storage used by an mp_int, and void its fields so that + if someone calls mp_clear() again for the same int later, we won't + get tollchocked. + */ + +void mp_clear(mp_int *mp) +{ + if(mp == NULL) + return; + + if(DIGITS(mp) != NULL) { +#if MP_CRYPTO + s_mp_setz(DIGITS(mp), ALLOC(mp)); +#endif + s_mp_free(DIGITS(mp)); + DIGITS(mp) = NULL; + } + + USED(mp) = 0; + ALLOC(mp) = 0; + +} /* end mp_clear() */ + +/* }}} */ + +/* {{{ mp_clear_array(mp[], count) */ + +void mp_clear_array(mp_int mp[], int count) +{ + ARGCHK(mp != NULL && count > 0, MP_BADARG); + + while(--count >= 0) + mp_clear(&mp[count]); + +} /* end mp_clear_array() */ + +/* }}} */ + +/* {{{ mp_zero(mp) */ + +/* + mp_zero(mp) + + Set mp to zero. Does not change the allocated size of the structure, + and therefore cannot fail (except on a bad argument, which we ignore) + */ +void mp_zero(mp_int *mp) +{ + if(mp == NULL) + return; + + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = MP_ZPOS; + +} /* end mp_zero() */ + +/* }}} */ + +/* {{{ mp_set(mp, d) */ + +void mp_set(mp_int *mp, mp_digit d) +{ + if(mp == NULL) + return; + + mp_zero(mp); + DIGIT(mp, 0) = d; + +} /* end mp_set() */ + +/* }}} */ + +/* {{{ mp_set_int(mp, z) */ + +mp_err mp_set_int(mp_int *mp, long z) +{ + int ix; + unsigned long v = abs(z); + mp_err res; + + ARGCHK(mp != NULL, MP_BADARG); + + mp_zero(mp); + if(z == 0) + return MP_OKAY; /* shortcut for zero */ + + for(ix = sizeof(long) - 1; ix >= 0; ix--) { + + if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY) + return res; + + res = s_mp_add_d(mp, + (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX)); + if(res != MP_OKAY) + return res; + + } + + if(z < 0) + SIGN(mp) = MP_NEG; + + return MP_OKAY; + +} /* end mp_set_int() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Digit arithmetic */ + +/* {{{ mp_add_d(a, d, b) */ + +/* + mp_add_d(a, d, b) + + Compute the sum b = a + d, for a single digit d. Respects the sign of + its primary addend (single digits are unsigned anyway). + */ + +mp_err mp_add_d(mp_int *a, mp_digit d, mp_int *b) +{ + mp_err res = MP_OKAY; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if((res = mp_copy(a, b)) != MP_OKAY) + return res; + + if(SIGN(b) == MP_ZPOS) { + res = s_mp_add_d(b, d); + } else if(s_mp_cmp_d(b, d) >= 0) { + res = s_mp_sub_d(b, d); + } else { + SIGN(b) = MP_ZPOS; + + DIGIT(b, 0) = d - DIGIT(b, 0); + } + + return res; + +} /* end mp_add_d() */ + +/* }}} */ + +/* {{{ mp_sub_d(a, d, b) */ + +/* + mp_sub_d(a, d, b) + + Compute the difference b = a - d, for a single digit d. Respects the + sign of its subtrahend (single digits are unsigned anyway). + */ + +mp_err mp_sub_d(mp_int *a, mp_digit d, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if((res = mp_copy(a, b)) != MP_OKAY) + return res; + + if(SIGN(b) == MP_NEG) { + if((res = s_mp_add_d(b, d)) != MP_OKAY) + return res; + + } else if(s_mp_cmp_d(b, d) >= 0) { + if((res = s_mp_sub_d(b, d)) != MP_OKAY) + return res; + + } else { + mp_neg(b, b); + + DIGIT(b, 0) = d - DIGIT(b, 0); + SIGN(b) = MP_NEG; + } + + if(s_mp_cmp_d(b, 0) == 0) + SIGN(b) = MP_ZPOS; + + return MP_OKAY; + +} /* end mp_sub_d() */ + +/* }}} */ + +/* {{{ mp_mul_d(a, d, b) */ + +/* + mp_mul_d(a, d, b) + + Compute the product b = a * d, for a single digit d. Respects the sign + of its multiplicand (single digits are unsigned anyway) + */ + +mp_err mp_mul_d(mp_int *a, mp_digit d, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if(d == 0) { + mp_zero(b); + return MP_OKAY; + } + + if((res = mp_copy(a, b)) != MP_OKAY) + return res; + + res = s_mp_mul_d(b, d); + + return res; + +} /* end mp_mul_d() */ + +/* }}} */ + +/* {{{ mp_mul_2(a, c) */ + +mp_err mp_mul_2(mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if((res = mp_copy(a, c)) != MP_OKAY) + return res; + + return s_mp_mul_2(c); + +} /* end mp_mul_2() */ + +/* }}} */ + +/* {{{ mp_div_d(a, d, q, r) */ + +/* + mp_div_d(a, d, q, r) + + Compute the quotient q = a / d and remainder r = a mod d, for a + single digit d. Respects the sign of its divisor (single digits are + unsigned anyway). + */ + +mp_err mp_div_d(mp_int *a, mp_digit d, mp_int *q, mp_digit *r) +{ + mp_err res; + mp_digit rem; + int pow; + + ARGCHK(a != NULL, MP_BADARG); + + if(d == 0) + return MP_RANGE; + + /* Shortcut for powers of two ... */ + if((pow = s_mp_ispow2d(d)) >= 0) { + mp_digit mask; + + mask = (1 << pow) - 1; + rem = DIGIT(a, 0) & mask; + + if(q) { + mp_copy(a, q); + s_mp_div_2d(q, pow); + } + + if(r) + *r = rem; + + return MP_OKAY; + } + + /* + If the quotient is actually going to be returned, we'll try to + avoid hitting the memory allocator by copying the dividend into it + and doing the division there. This can't be any _worse_ than + always copying, and will sometimes be better (since it won't make + another copy) + + If it's not going to be returned, we need to allocate a temporary + to hold the quotient, which will just be discarded. + */ + if(q) { + if((res = mp_copy(a, q)) != MP_OKAY) + return res; + + res = s_mp_div_d(q, d, &rem); + if(s_mp_cmp_d(q, 0) == MP_EQ) + SIGN(q) = MP_ZPOS; + + } else { + mp_int qp; + + if((res = mp_init_copy(&qp, a)) != MP_OKAY) + return res; + + res = s_mp_div_d(&qp, d, &rem); + if(s_mp_cmp_d(&qp, 0) == 0) + SIGN(&qp) = MP_ZPOS; + + mp_clear(&qp); + } + + if(r) + *r = rem; + + return res; + +} /* end mp_div_d() */ + +/* }}} */ + +/* {{{ mp_div_2(a, c) */ + +/* + mp_div_2(a, c) + + Compute c = a / 2, disregarding the remainder. + */ + +mp_err mp_div_2(mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if((res = mp_copy(a, c)) != MP_OKAY) + return res; + + s_mp_div_2(c); + + return MP_OKAY; + +} /* end mp_div_2() */ + +/* }}} */ + +/* {{{ mp_expt_d(a, d, b) */ + +mp_err mp_expt_d(mp_int *a, mp_digit d, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if((res = mp_init(&s)) != MP_OKAY) + return res; + if((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + DIGIT(&s, 0) = 1; + + while(d != 0) { + if(d & 1) { + if((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt_d() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Full arithmetic */ + +/* {{{ mp_abs(a, b) */ + +/* + mp_abs(a, b) + + Compute b = |a|. 'a' and 'b' may be identical. + */ + +mp_err mp_abs(mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if((res = mp_copy(a, b)) != MP_OKAY) + return res; + + SIGN(b) = MP_ZPOS; + + return MP_OKAY; + +} /* end mp_abs() */ + +/* }}} */ + +/* {{{ mp_neg(a, b) */ + +/* + mp_neg(a, b) + + Compute b = -a. 'a' and 'b' may be identical. + */ + +mp_err mp_neg(mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if((res = mp_copy(a, b)) != MP_OKAY) + return res; + + if(s_mp_cmp_d(b, 0) == MP_EQ) + SIGN(b) = MP_ZPOS; + else + SIGN(b) = (SIGN(b) == MP_NEG) ? MP_ZPOS : MP_NEG; + + return MP_OKAY; + +} /* end mp_neg() */ + +/* }}} */ + +/* {{{ mp_add(a, b, c) */ + +/* + mp_add(a, b, c) + + Compute c = a + b. All parameters may be identical. + */ + +mp_err mp_add(mp_int *a, mp_int *b, mp_int *c) +{ + mp_err res; + int cmp; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if(SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */ + + /* Commutativity of addition lets us do this in either order, + so we avoid having to use a temporary even if the result + is supposed to replace the output + */ + if(c == b) { + if((res = s_mp_add(c, a)) != MP_OKAY) + return res; + } else { + if(c != a && (res = mp_copy(a, c)) != MP_OKAY) + return res; + + if((res = s_mp_add(c, b)) != MP_OKAY) + return res; + } + + } else if((cmp = s_mp_cmp(a, b)) > 0) { /* different sign: a > b */ + + /* If the output is going to be clobbered, we will use a temporary + variable; otherwise, we'll do it without touching the memory + allocator at all, if possible + */ + if(c == b) { + mp_int tmp; + + if((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + if((res = s_mp_sub(&tmp, b)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + s_mp_exch(&tmp, c); + mp_clear(&tmp); + + } else { + + if(c != a && (res = mp_copy(a, c)) != MP_OKAY) + return res; + if((res = s_mp_sub(c, b)) != MP_OKAY) + return res; + + } + + } else if(cmp == 0) { /* different sign, a == b */ + + mp_zero(c); + return MP_OKAY; + + } else { /* different sign: a < b */ + + /* See above... */ + if(c == a) { + mp_int tmp; + + if((res = mp_init_copy(&tmp, b)) != MP_OKAY) + return res; + if((res = s_mp_sub(&tmp, a)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + s_mp_exch(&tmp, c); + mp_clear(&tmp); + + } else { + + if(c != b && (res = mp_copy(b, c)) != MP_OKAY) + return res; + if((res = s_mp_sub(c, a)) != MP_OKAY) + return res; + + } + } + + if(USED(c) == 1 && DIGIT(c, 0) == 0) + SIGN(c) = MP_ZPOS; + + return MP_OKAY; + +} /* end mp_add() */ + +/* }}} */ + +/* {{{ mp_sub(a, b, c) */ + +/* + mp_sub(a, b, c) + + Compute c = a - b. All parameters may be identical. + */ + +mp_err mp_sub(mp_int *a, mp_int *b, mp_int *c) +{ + mp_err res; + int cmp; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if(SIGN(a) != SIGN(b)) { + if(c == a) { + if((res = s_mp_add(c, b)) != MP_OKAY) + return res; + } else { + if(c != b && ((res = mp_copy(b, c)) != MP_OKAY)) + return res; + if((res = s_mp_add(c, a)) != MP_OKAY) + return res; + SIGN(c) = SIGN(a); + } + + } else if((cmp = s_mp_cmp(a, b)) > 0) { /* Same sign, a > b */ + if(c == b) { + mp_int tmp; + + if((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + if((res = s_mp_sub(&tmp, b)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + s_mp_exch(&tmp, c); + mp_clear(&tmp); + + } else { + if(c != a && ((res = mp_copy(a, c)) != MP_OKAY)) + return res; + + if((res = s_mp_sub(c, b)) != MP_OKAY) + return res; + } + + } else if(cmp == 0) { /* Same sign, equal magnitude */ + mp_zero(c); + return MP_OKAY; + + } else { /* Same sign, b > a */ + if(c == a) { + mp_int tmp; + + if((res = mp_init_copy(&tmp, b)) != MP_OKAY) + return res; + + if((res = s_mp_sub(&tmp, a)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + s_mp_exch(&tmp, c); + mp_clear(&tmp); + + } else { + if(c != b && ((res = mp_copy(b, c)) != MP_OKAY)) + return res; + + if((res = s_mp_sub(c, a)) != MP_OKAY) + return res; + } + + SIGN(c) = !SIGN(b); + } + + if(USED(c) == 1 && DIGIT(c, 0) == 0) + SIGN(c) = MP_ZPOS; + + return MP_OKAY; + +} /* end mp_sub() */ + +/* }}} */ + +/* {{{ mp_mul(a, b, c) */ + +/* + mp_mul(a, b, c) + + Compute c = a * b. All parameters may be identical. + */ + +mp_err mp_mul(mp_int *a, mp_int *b, mp_int *c) +{ + mp_err res; + mp_sign sgn; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + sgn = (SIGN(a) == SIGN(b)) ? MP_ZPOS : MP_NEG; + + if(c == b) { + if((res = s_mp_mul(c, a)) != MP_OKAY) + return res; + + } else { + if((res = mp_copy(a, c)) != MP_OKAY) + return res; + + if((res = s_mp_mul(c, b)) != MP_OKAY) + return res; + } + + if(sgn == MP_ZPOS || s_mp_cmp_d(c, 0) == MP_EQ) + SIGN(c) = MP_ZPOS; + else + SIGN(c) = sgn; + + return MP_OKAY; + +} /* end mp_mul() */ + +/* }}} */ + +/* {{{ mp_mul_2d(a, d, c) */ + +/* + mp_mul_2d(a, d, c) + + Compute c = a * 2^d. a may be the same as c. + */ + +mp_err mp_mul_2d(mp_int *a, mp_digit d, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if((res = mp_copy(a, c)) != MP_OKAY) + return res; + + if(d == 0) + return MP_OKAY; + + return s_mp_mul_2d(c, d); + +} /* end mp_mul() */ + +/* }}} */ + +/* {{{ mp_sqr(a, b) */ + +#if MP_SQUARE +mp_err mp_sqr(mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if((res = mp_copy(a, b)) != MP_OKAY) + return res; + + if((res = s_mp_sqr(b)) != MP_OKAY) + return res; + + SIGN(b) = MP_ZPOS; + + return MP_OKAY; + +} /* end mp_sqr() */ +#endif + +/* }}} */ + +/* {{{ mp_div(a, b, q, r) */ + +/* + mp_div(a, b, q, r) + + Compute q = a / b and r = a mod b. Input parameters may be re-used + as output parameters. If q or r is NULL, that portion of the + computation will be discarded (although it will still be computed) + + Pay no attention to the hacker behind the curtain. + */ + +mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r) +{ + mp_err res; + mp_int qtmp, rtmp; + int cmp; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if(mp_cmp_z(b) == MP_EQ) + return MP_RANGE; + + /* If a <= b, we can compute the solution without division, and + avoid any memory allocation + */ + if((cmp = s_mp_cmp(a, b)) < 0) { + if(r) { + if((res = mp_copy(a, r)) != MP_OKAY) + return res; + } + + if(q) + mp_zero(q); + + return MP_OKAY; + + } else if(cmp == 0) { + + /* Set quotient to 1, with appropriate sign */ + if(q) { + int qneg = (SIGN(a) != SIGN(b)); + + mp_set(q, 1); + if(qneg) + SIGN(q) = MP_NEG; + } + + if(r) + mp_zero(r); + + return MP_OKAY; + } + + /* If we get here, it means we actually have to do some division */ + + /* Set up some temporaries... */ + if((res = mp_init_copy(&qtmp, a)) != MP_OKAY) + return res; + if((res = mp_init_copy(&rtmp, b)) != MP_OKAY) + goto CLEANUP; + + if((res = s_mp_div(&qtmp, &rtmp)) != MP_OKAY) + goto CLEANUP; + + /* Compute the signs for the output */ + SIGN(&rtmp) = SIGN(a); /* Sr = Sa */ + if(SIGN(a) == SIGN(b)) + SIGN(&qtmp) = MP_ZPOS; /* Sq = MP_ZPOS if Sa = Sb */ + else + SIGN(&qtmp) = MP_NEG; /* Sq = MP_NEG if Sa != Sb */ + + if(s_mp_cmp_d(&qtmp, 0) == MP_EQ) + SIGN(&qtmp) = MP_ZPOS; + if(s_mp_cmp_d(&rtmp, 0) == MP_EQ) + SIGN(&rtmp) = MP_ZPOS; + + /* Copy output, if it is needed */ + if(q) + s_mp_exch(&qtmp, q); + + if(r) + s_mp_exch(&rtmp, r); + +CLEANUP: + mp_clear(&rtmp); + mp_clear(&qtmp); + + return res; + +} /* end mp_div() */ + +/* }}} */ + +/* {{{ mp_div_2d(a, d, q, r) */ + +mp_err mp_div_2d(mp_int *a, mp_digit d, mp_int *q, mp_int *r) +{ + mp_err res; + + ARGCHK(a != NULL, MP_BADARG); + + if(q) { + if((res = mp_copy(a, q)) != MP_OKAY) + return res; + + s_mp_div_2d(q, d); + } + + if(r) { + if((res = mp_copy(a, r)) != MP_OKAY) + return res; + + s_mp_mod_2d(r, d); + } + + return MP_OKAY; + +} /* end mp_div_2d() */ + +/* }}} */ + +/* {{{ mp_expt(a, b, c) */ + +/* + mp_expt(a, b, c) + + Compute c = a ** b, that is, raise a to the b power. Uses a + standard iterative square-and-multiply technique. + */ + +mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int s, x; + mp_err res; + mp_digit d; + int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if(mp_cmp_z(b) < 0) + return MP_RANGE; + + if((res = mp_init(&s)) != MP_OKAY) + return res; + + mp_set(&s, 1); + + if((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + /* Loop over low-order digits in ascending order */ + for(dig = 0; dig < (USED(b) - 1); dig++) { + d = DIGIT(b, dig); + + /* Loop over bits of each non-maximal digit */ + for(bit = 0; bit < DIGIT_BIT; bit++) { + if(d & 1) { + if((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Consider now the last digit... */ + d = DIGIT(b, dig); + + while(d) { + if(d & 1) { + if((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + if(mp_iseven(b)) + SIGN(&s) = SIGN(a); + + res = mp_copy(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt() */ + +/* }}} */ + +/* {{{ mp_2expt(a, k) */ + +/* Compute a = 2^k */ + +mp_err mp_2expt(mp_int *a, mp_digit k) +{ + ARGCHK(a != NULL, MP_BADARG); + + return s_mp_2expt(a, k); + +} /* end mp_2expt() */ + +/* }}} */ + +/* {{{ mp_mod(a, m, c) */ + +/* + mp_mod(a, m, c) + + Compute c = a (mod m). Result will always be 0 <= c < m. + */ + +mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c) +{ + mp_err res; + int mag; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if(SIGN(m) == MP_NEG) + return MP_RANGE; + + /* + If |a| > m, we need to divide to get the remainder and take the + absolute value. + + If |a| < m, we don't need to do any division, just copy and adjust + the sign (if a is negative). + + If |a| == m, we can simply set the result to zero. + + This order is intended to minimize the average path length of the + comparison chain on common workloads -- the most frequent cases are + that |a| != m, so we do those first. + */ + if((mag = s_mp_cmp(a, m)) > 0) { + if((res = mp_div(a, m, NULL, c)) != MP_OKAY) + return res; + + if(SIGN(c) == MP_NEG) { + if((res = mp_add(c, m, c)) != MP_OKAY) + return res; + } + + } else if(mag < 0) { + if((res = mp_copy(a, c)) != MP_OKAY) + return res; + + if(mp_cmp_z(a) < 0) { + if((res = mp_add(c, m, c)) != MP_OKAY) + return res; + + } + + } else { + mp_zero(c); + + } + + return MP_OKAY; + +} /* end mp_mod() */ + +/* }}} */ + +/* {{{ mp_mod_d(a, d, c) */ + +/* + mp_mod_d(a, d, c) + + Compute c = a (mod d). Result will always be 0 <= c < d + */ +mp_err mp_mod_d(mp_int *a, mp_digit d, mp_digit *c) +{ + mp_err res; + mp_digit rem; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if(s_mp_cmp_d(a, d) > 0) { + if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY) + return res; + + } else { + if(SIGN(a) == MP_NEG) + rem = d - DIGIT(a, 0); + else + rem = DIGIT(a, 0); + } + + if(c) + *c = rem; + + return MP_OKAY; + +} /* end mp_mod_d() */ + +/* }}} */ + +/* {{{ mp_sqrt(a, b) */ + +/* + mp_sqrt(a, b) + + Compute the integer square root of a, and store the result in b. + Uses an integer-arithmetic version of Newton's iterative linear + approximation technique to determine this value; the result has the + following two properties: + + b^2 <= a + (b+1)^2 >= a + + It is a range error to pass a negative value. + */ +mp_err mp_sqrt(mp_int *a, mp_int *b) +{ + mp_int x, t; + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + /* Cannot take square root of a negative value */ + if(SIGN(a) == MP_NEG) + return MP_RANGE; + + /* Special cases for zero and one, trivial */ + if(mp_cmp_d(a, 0) == MP_EQ || mp_cmp_d(a, 1) == MP_EQ) + return mp_copy(a, b); + + /* Initialize the temporaries we'll use below */ + if((res = mp_init_size(&t, USED(a))) != MP_OKAY) + return res; + + /* Compute an initial guess for the iteration as a itself */ + if((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + +s_mp_rshd(&x, (USED(&x)/2)+1); +mp_add_d(&x, 1, &x); + + for(;;) { + /* t = (x * x) - a */ + mp_copy(&x, &t); /* can't fail, t is big enough for original x */ + if((res = mp_sqr(&t, &t)) != MP_OKAY || + (res = mp_sub(&t, a, &t)) != MP_OKAY) + goto CLEANUP; + + /* t = t / 2x */ + s_mp_mul_2(&x); + if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY) + goto CLEANUP; + s_mp_div_2(&x); + + /* Terminate the loop, if the quotient is zero */ + if(mp_cmp_z(&t) == MP_EQ) + break; + + /* x = x - t */ + if((res = mp_sub(&x, &t, &x)) != MP_OKAY) + goto CLEANUP; + + } + + /* Copy result to output parameter */ + mp_sub_d(&x, 1, &x); + s_mp_exch(&x, b); + + CLEANUP: + mp_clear(&x); + X: + mp_clear(&t); + + return res; + +} /* end mp_sqrt() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Modular arithmetic */ + +#if MP_MODARITH +/* {{{ mp_addmod(a, b, m, c) */ + +/* + mp_addmod(a, b, m, c) + + Compute c = (a + b) mod m + */ + +mp_err mp_addmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if((res = mp_add(a, b, c)) != MP_OKAY) + return res; + if((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; + +} + +/* }}} */ + +/* {{{ mp_submod(a, b, m, c) */ + +/* + mp_submod(a, b, m, c) + + Compute c = (a - b) mod m + */ + +mp_err mp_submod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if((res = mp_sub(a, b, c)) != MP_OKAY) + return res; + if((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; + +} + +/* }}} */ + +/* {{{ mp_mulmod(a, b, m, c) */ + +/* + mp_mulmod(a, b, m, c) + + Compute c = (a * b) mod m + */ + +mp_err mp_mulmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if((res = mp_mul(a, b, c)) != MP_OKAY) + return res; + if((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; + +} + +/* }}} */ + +/* {{{ mp_sqrmod(a, m, c) */ + +#if MP_SQUARE +mp_err mp_sqrmod(mp_int *a, mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if((res = mp_sqr(a, c)) != MP_OKAY) + return res; + if((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; + +} /* end mp_sqrmod() */ +#endif + +/* }}} */ + +/* {{{ mp_exptmod(a, b, m, c) */ + +/* + mp_exptmod(a, b, m, c) + + Compute c = (a ** b) mod m. Uses a standard square-and-multiply + method with modular reductions at each step. (This is basically the + same code as mp_expt(), except for the addition of the reductions) + + The modular reductions are done using Barrett's algorithm (see + s_mp_reduce() below for details) + */ + +mp_err mp_exptmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c) +{ + mp_int s, x, mu; + mp_err res; + mp_digit d, *db = DIGITS(b); + mp_size ub = USED(b); + int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0) + return MP_RANGE; + + if((res = mp_init(&s)) != MP_OKAY) + return res; + if((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + if((res = mp_mod(&x, m, &x)) != MP_OKAY || + (res = mp_init(&mu)) != MP_OKAY) + goto MU; + + mp_set(&s, 1); + + /* mu = b^2k / m */ + s_mp_add_d(&mu, 1); + s_mp_lshd(&mu, 2 * USED(m)); + if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY) + goto CLEANUP; + + /* Loop over digits of b in ascending order, except highest order */ + for(dig = 0; dig < (ub - 1); dig++) { + d = *db++; + + /* Loop over the bits of the lower-order digits */ + for(bit = 0; bit < DIGIT_BIT; bit++) { + if(d & 1) { + if((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Now do the last digit... */ + d = *db; + + while(d) { + if(d & 1) { + if((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + + CLEANUP: + mp_clear(&mu); + MU: + mp_clear(&x); + X: + mp_clear(&s); + + return res; + +} /* end mp_exptmod() */ + +/* }}} */ + +/* {{{ mp_exptmod_d(a, d, m, c) */ + +mp_err mp_exptmod_d(mp_int *a, mp_digit d, mp_int *m, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if((res = mp_init(&s)) != MP_OKAY) + return res; + if((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + mp_set(&s, 1); + + while(d != 0) { + if(d & 1) { + if((res = s_mp_mul(&s, &x)) != MP_OKAY || + (res = mp_mod(&s, m, &s)) != MP_OKAY) + goto CLEANUP; + } + + d /= 2; + + if((res = s_mp_sqr(&x)) != MP_OKAY || + (res = mp_mod(&x, m, &x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_exptmod_d() */ + +/* }}} */ +#endif /* if MP_MODARITH */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Comparison functions */ + +/* {{{ mp_cmp_z(a) */ + +/* + mp_cmp_z(a) + + Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0. + */ + +int mp_cmp_z(mp_int *a) +{ + if(SIGN(a) == MP_NEG) + return MP_LT; + else if(USED(a) == 1 && DIGIT(a, 0) == 0) + return MP_EQ; + else + return MP_GT; + +} /* end mp_cmp_z() */ + +/* }}} */ + +/* {{{ mp_cmp_d(a, d) */ + +/* + mp_cmp_d(a, d) + + Compare a <=> d. Returns <0 if a0 if a>d + */ + +int mp_cmp_d(mp_int *a, mp_digit d) +{ + ARGCHK(a != NULL, MP_EQ); + + if(SIGN(a) == MP_NEG) + return MP_LT; + + return s_mp_cmp_d(a, d); + +} /* end mp_cmp_d() */ + +/* }}} */ + +/* {{{ mp_cmp(a, b) */ + +int mp_cmp(mp_int *a, mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + if(SIGN(a) == SIGN(b)) { + int mag; + + if((mag = s_mp_cmp(a, b)) == MP_EQ) + return MP_EQ; + + if(SIGN(a) == MP_ZPOS) + return mag; + else + return -mag; + + } else if(SIGN(a) == MP_ZPOS) { + return MP_GT; + } else { + return MP_LT; + } + +} /* end mp_cmp() */ + +/* }}} */ + +/* {{{ mp_cmp_mag(a, b) */ + +/* + mp_cmp_mag(a, b) + + Compares |a| <=> |b|, and returns an appropriate comparison result + */ + +int mp_cmp_mag(mp_int *a, mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + return s_mp_cmp(a, b); + +} /* end mp_cmp_mag() */ + +/* }}} */ + +/* {{{ mp_cmp_int(a, z) */ + +/* + This just converts z to an mp_int, and uses the existing comparison + routines. This is sort of inefficient, but it's not clear to me how + frequently this wil get used anyway. For small positive constants, + you can always use mp_cmp_d(), and for zero, there is mp_cmp_z(). + */ +int mp_cmp_int(mp_int *a, long z) +{ + mp_int tmp; + int out; + + ARGCHK(a != NULL, MP_EQ); + + mp_init(&tmp); mp_set_int(&tmp, z); + out = mp_cmp(a, &tmp); + mp_clear(&tmp); + + return out; + +} /* end mp_cmp_int() */ + +/* }}} */ + +/* {{{ mp_isodd(a) */ + +/* + mp_isodd(a) + + Returns a true (non-zero) value if a is odd, false (zero) otherwise. + */ +int mp_isodd(mp_int *a) +{ + ARGCHK(a != NULL, 0); + + return (DIGIT(a, 0) & 1); + +} /* end mp_isodd() */ + +/* }}} */ + +/* {{{ mp_iseven(a) */ + +int mp_iseven(mp_int *a) +{ + return !mp_isodd(a); + +} /* end mp_iseven() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Number theoretic functions */ + +#if MP_NUMTH +/* {{{ mp_gcd(a, b, c) */ + +/* + Like the old mp_gcd() function, except computes the GCD using the + binary algorithm due to Josef Stein in 1961 (via Knuth). + */ +mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c) +{ + mp_err res; + mp_int u, v, t; + mp_size k = 0; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ) + return MP_RANGE; + if(mp_cmp_z(a) == MP_EQ) { + return mp_copy(b, c); + } else if(mp_cmp_z(b) == MP_EQ) { + return mp_copy(a, c); + } + + if((res = mp_init(&t)) != MP_OKAY) + return res; + if((res = mp_init_copy(&u, a)) != MP_OKAY) + goto U; + if((res = mp_init_copy(&v, b)) != MP_OKAY) + goto V; + + SIGN(&u) = MP_ZPOS; + SIGN(&v) = MP_ZPOS; + + /* Divide out common factors of 2 until at least 1 of a, b is even */ + while(mp_iseven(&u) && mp_iseven(&v)) { + s_mp_div_2(&u); + s_mp_div_2(&v); + ++k; + } + + /* Initialize t */ + if(mp_isodd(&u)) { + if((res = mp_copy(&v, &t)) != MP_OKAY) + goto CLEANUP; + + /* t = -v */ + if(SIGN(&v) == MP_ZPOS) + SIGN(&t) = MP_NEG; + else + SIGN(&t) = MP_ZPOS; + + } else { + if((res = mp_copy(&u, &t)) != MP_OKAY) + goto CLEANUP; + + } + + for(;;) { + while(mp_iseven(&t)) { + s_mp_div_2(&t); + } + + if(mp_cmp_z(&t) == MP_GT) { + if((res = mp_copy(&t, &u)) != MP_OKAY) + goto CLEANUP; + + } else { + if((res = mp_copy(&t, &v)) != MP_OKAY) + goto CLEANUP; + + /* v = -t */ + if(SIGN(&t) == MP_ZPOS) + SIGN(&v) = MP_NEG; + else + SIGN(&v) = MP_ZPOS; + } + + if((res = mp_sub(&u, &v, &t)) != MP_OKAY) + goto CLEANUP; + + if(s_mp_cmp_d(&t, 0) == MP_EQ) + break; + } + + s_mp_2expt(&v, k); /* v = 2^k */ + res = mp_mul(&u, &v, c); /* c = u * v */ + + CLEANUP: + mp_clear(&v); + V: + mp_clear(&u); + U: + mp_clear(&t); + + return res; + +} /* end mp_bgcd() */ + +/* }}} */ + +/* {{{ mp_lcm(a, b, c) */ + +/* We compute the least common multiple using the rule: + + ab = [a, b](a, b) + + ... by computing the product, and dividing out the gcd. + */ + +mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int gcd, prod; + mp_err res; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + /* Set up temporaries */ + if((res = mp_init(&gcd)) != MP_OKAY) + return res; + if((res = mp_init(&prod)) != MP_OKAY) + goto GCD; + + if((res = mp_mul(a, b, &prod)) != MP_OKAY) + goto CLEANUP; + if((res = mp_gcd(a, b, &gcd)) != MP_OKAY) + goto CLEANUP; + + res = mp_div(&prod, &gcd, c, NULL); + + CLEANUP: + mp_clear(&prod); + GCD: + mp_clear(&gcd); + + return res; + +} /* end mp_lcm() */ + +/* }}} */ + +/* {{{ mp_xgcd(a, b, g, x, y) */ + +/* + mp_xgcd(a, b, g, x, y) + + Compute g = (a, b) and values x and y satisfying Bezout's identity + (that is, ax + by = g). This uses the extended binary GCD algorithm + based on the Stein algorithm used for mp_gcd() + */ + +mp_err mp_xgcd(mp_int *a, mp_int *b, mp_int *g, mp_int *x, mp_int *y) +{ + mp_int gx, xc, yc, u, v, A, B, C, D; + mp_int *clean[9]; + mp_err res; + int last = -1; + + if(mp_cmp_z(b) == 0) + return MP_RANGE; + + /* Initialize all these variables we need */ + if((res = mp_init(&u)) != MP_OKAY) goto CLEANUP; + clean[++last] = &u; + if((res = mp_init(&v)) != MP_OKAY) goto CLEANUP; + clean[++last] = &v; + if((res = mp_init(&gx)) != MP_OKAY) goto CLEANUP; + clean[++last] = &gx; + if((res = mp_init(&A)) != MP_OKAY) goto CLEANUP; + clean[++last] = &A; + if((res = mp_init(&B)) != MP_OKAY) goto CLEANUP; + clean[++last] = &B; + if((res = mp_init(&C)) != MP_OKAY) goto CLEANUP; + clean[++last] = &C; + if((res = mp_init(&D)) != MP_OKAY) goto CLEANUP; + clean[++last] = &D; + if((res = mp_init_copy(&xc, a)) != MP_OKAY) goto CLEANUP; + clean[++last] = &xc; + mp_abs(&xc, &xc); + if((res = mp_init_copy(&yc, b)) != MP_OKAY) goto CLEANUP; + clean[++last] = &yc; + mp_abs(&yc, &yc); + + mp_set(&gx, 1); + + /* Divide by two until at least one of them is even */ + while(mp_iseven(&xc) && mp_iseven(&yc)) { + s_mp_div_2(&xc); + s_mp_div_2(&yc); + if((res = s_mp_mul_2(&gx)) != MP_OKAY) + goto CLEANUP; + } + + mp_copy(&xc, &u); + mp_copy(&yc, &v); + mp_set(&A, 1); mp_set(&D, 1); + + /* Loop through binary GCD algorithm */ + for(;;) { + while(mp_iseven(&u)) { + s_mp_div_2(&u); + + if(mp_iseven(&A) && mp_iseven(&B)) { + s_mp_div_2(&A); s_mp_div_2(&B); + } else { + if((res = mp_add(&A, &yc, &A)) != MP_OKAY) goto CLEANUP; + s_mp_div_2(&A); + if((res = mp_sub(&B, &xc, &B)) != MP_OKAY) goto CLEANUP; + s_mp_div_2(&B); + } + } + + while(mp_iseven(&v)) { + s_mp_div_2(&v); + + if(mp_iseven(&C) && mp_iseven(&D)) { + s_mp_div_2(&C); s_mp_div_2(&D); + } else { + if((res = mp_add(&C, &yc, &C)) != MP_OKAY) goto CLEANUP; + s_mp_div_2(&C); + if((res = mp_sub(&D, &xc, &D)) != MP_OKAY) goto CLEANUP; + s_mp_div_2(&D); + } + } + + if(mp_cmp(&u, &v) >= 0) { + if((res = mp_sub(&u, &v, &u)) != MP_OKAY) goto CLEANUP; + if((res = mp_sub(&A, &C, &A)) != MP_OKAY) goto CLEANUP; + if((res = mp_sub(&B, &D, &B)) != MP_OKAY) goto CLEANUP; + + } else { + if((res = mp_sub(&v, &u, &v)) != MP_OKAY) goto CLEANUP; + if((res = mp_sub(&C, &A, &C)) != MP_OKAY) goto CLEANUP; + if((res = mp_sub(&D, &B, &D)) != MP_OKAY) goto CLEANUP; + + } + + /* If we're done, copy results to output */ + if(mp_cmp_z(&u) == 0) { + if(x) + if((res = mp_copy(&C, x)) != MP_OKAY) goto CLEANUP; + + if(y) + if((res = mp_copy(&D, y)) != MP_OKAY) goto CLEANUP; + + if(g) + if((res = mp_mul(&gx, &v, g)) != MP_OKAY) goto CLEANUP; + + break; + } + } + + CLEANUP: + while(last >= 0) + mp_clear(clean[last--]); + + return res; + +} /* end mp_xgcd() */ + +/* }}} */ + +/* {{{ mp_invmod(a, m, c) */ + +/* + mp_invmod(a, m, c) + + Compute c = a^-1 (mod m), if there is an inverse for a (mod m). + This is equivalent to the question of whether (a, m) = 1. If not, + MP_UNDEF is returned, and there is no inverse. + */ + +mp_err mp_invmod(mp_int *a, mp_int *m, mp_int *c) +{ + mp_int g, x; + mp_err res; + + ARGCHK(a && m && c, MP_BADARG); + + if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + + if((res = mp_init(&g)) != MP_OKAY) + return res; + if((res = mp_init(&x)) != MP_OKAY) + goto X; + + if((res = mp_xgcd(a, m, &g, &x, NULL)) != MP_OKAY) + goto CLEANUP; + + if(mp_cmp_d(&g, 1) != MP_EQ) { + res = MP_UNDEF; + goto CLEANUP; + } + + res = mp_mod(&x, m, c); + SIGN(c) = SIGN(a); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&g); + + return res; + +} /* end mp_invmod() */ + +/* }}} */ +#endif /* if MP_NUMTH */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ mp_print(mp, ofp) */ + +#if MP_IOFUNC +/* + mp_print(mp, ofp) + + Print a textual representation of the given mp_int on the output + stream 'ofp'. Output is generated using the internal radix. + */ + +void mp_print(mp_int *mp, FILE *ofp) +{ + int ix; + + if(mp == NULL || ofp == NULL) + return; + + fputc((SIGN(mp) == MP_NEG) ? '-' : '+', ofp); + + for(ix = USED(mp) - 1; ix >= 0; ix--) { + fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix)); + } + +} /* end mp_print() */ + +#endif /* if MP_IOFUNC */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ More I/O Functions */ + +/* {{{ mp_read_signed_bin(mp, str, len) */ + +/* + mp_read_signed_bin(mp, str, len) + + Read in a raw value (base 256) into the given mp_int + */ + +mp_err mp_read_signed_bin(mp_int *mp, unsigned char *str, int len) +{ + mp_err res; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + if((res = mp_read_unsigned_bin(mp, str + 1, len - 1)) == MP_OKAY) { + /* Get sign from first byte */ + if(str[0]) + SIGN(mp) = MP_NEG; + else + SIGN(mp) = MP_ZPOS; + } + + return res; + +} /* end mp_read_signed_bin() */ + +/* }}} */ + +/* {{{ mp_signed_bin_size(mp) */ + +int mp_signed_bin_size(mp_int *mp) +{ + ARGCHK(mp != NULL, 0); + + return mp_unsigned_bin_size(mp) + 1; + +} /* end mp_signed_bin_size() */ + +/* }}} */ + +/* {{{ mp_to_signed_bin(mp, str) */ + +mp_err mp_to_signed_bin(mp_int *mp, unsigned char *str) +{ + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + + /* Caller responsible for allocating enough memory (use mp_raw_size(mp)) */ + str[0] = (char)SIGN(mp); + + return mp_to_unsigned_bin(mp, str + 1); + +} /* end mp_to_signed_bin() */ + +/* }}} */ + +/* {{{ mp_read_unsigned_bin(mp, str, len) */ + +/* + mp_read_unsigned_bin(mp, str, len) + + Read in an unsigned value (base 256) into the given mp_int + */ + +mp_err mp_read_unsigned_bin(mp_int *mp, unsigned char *str, int len) +{ + int ix; + mp_err res; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + mp_zero(mp); + + for(ix = 0; ix < len; ix++) { + if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY) + return res; + + if((res = mp_add_d(mp, str[ix], mp)) != MP_OKAY) + return res; + } + + return MP_OKAY; + +} /* end mp_read_unsigned_bin() */ + +/* }}} */ + +/* {{{ mp_unsigned_bin_size(mp) */ + +int mp_unsigned_bin_size(mp_int *mp) +{ + mp_digit topdig; + int count; + + ARGCHK(mp != NULL, 0); + + /* Special case for the value zero */ + if(USED(mp) == 1 && DIGIT(mp, 0) == 0) + return 1; + + count = (USED(mp) - 1) * sizeof(mp_digit); + topdig = DIGIT(mp, USED(mp) - 1); + + while(topdig != 0) { + ++count; + topdig >>= CHAR_BIT; + } + + return count; + +} /* end mp_unsigned_bin_size() */ + +/* }}} */ + +/* {{{ mp_to_unsigned_bin(mp, str) */ + +mp_err mp_to_unsigned_bin(mp_int *mp, unsigned char *str) +{ + mp_digit *dp, *end, d; + unsigned char *spos; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + + dp = DIGITS(mp); + end = dp + USED(mp) - 1; + spos = str; + + /* Special case for zero, quick test */ + if(dp == end && *dp == 0) { + *str = '\0'; + return MP_OKAY; + } + + /* Generate digits in reverse order */ + while(dp < end) { + int ix; + + d = *dp; + for(ix = 0; ix < sizeof(mp_digit); ++ix) { + *spos = d & UCHAR_MAX; + d >>= CHAR_BIT; + ++spos; + } + + ++dp; + } + + /* Now handle last digit specially, high order zeroes are not written */ + d = *end; + while(d != 0) { + *spos = d & UCHAR_MAX; + d >>= CHAR_BIT; + ++spos; + } + + /* Reverse everything to get digits in the correct order */ + while(--spos > str) { + unsigned char t = *str; + *str = *spos; + *spos = t; + + ++str; + } + + return MP_OKAY; + +} /* end mp_to_unsigned_bin() */ + +/* }}} */ + +/* {{{ mp_count_bits(mp) */ + +int mp_count_bits(mp_int *mp) +{ + int len; + mp_digit d; + + ARGCHK(mp != NULL, MP_BADARG); + + len = DIGIT_BIT * (USED(mp) - 1); + d = DIGIT(mp, USED(mp) - 1); + + while(d != 0) { + ++len; + d >>= 1; + } + + return len; + +} /* end mp_count_bits() */ + +/* }}} */ + +/* {{{ mp_read_radix(mp, str, radix) */ + +/* + mp_read_radix(mp, str, radix) + + Read an integer from the given string, and set mp to the resulting + value. The input is presumed to be in base 10. Leading non-digit + characters are ignored, and the function reads until a non-digit + character or the end of the string. + */ + +mp_err mp_read_radix(mp_int *mp, unsigned char *str, int radix) +{ + int ix = 0, val = 0; + mp_err res; + mp_sign sig = MP_ZPOS; + + ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, + MP_BADARG); + + mp_zero(mp); + + /* Skip leading non-digit characters until a digit or '-' or '+' */ + while(str[ix] && + (s_mp_tovalue(str[ix], radix) < 0) && + str[ix] != '-' && + str[ix] != '+') { + ++ix; + } + + if(str[ix] == '-') { + sig = MP_NEG; + ++ix; + } else if(str[ix] == '+') { + sig = MP_ZPOS; /* this is the default anyway... */ + ++ix; + } + + while((val = s_mp_tovalue(str[ix], radix)) >= 0) { + if((res = s_mp_mul_d(mp, radix)) != MP_OKAY) + return res; + if((res = s_mp_add_d(mp, val)) != MP_OKAY) + return res; + ++ix; + } + + if(s_mp_cmp_d(mp, 0) == MP_EQ) + SIGN(mp) = MP_ZPOS; + else + SIGN(mp) = sig; + + return MP_OKAY; + +} /* end mp_read_radix() */ + +/* }}} */ + +/* {{{ mp_radix_size(mp, radix) */ + +int mp_radix_size(mp_int *mp, int radix) +{ + int len; + ARGCHK(mp != NULL, 0); + + len = s_mp_outlen(mp_count_bits(mp), radix) + 1; /* for NUL terminator */ + + if(mp_cmp_z(mp) < 0) + ++len; /* for sign */ + + return len; + +} /* end mp_radix_size() */ + +/* }}} */ + +/* {{{ mp_value_radix_size(num, qty, radix) */ + +/* num = number of digits + qty = number of bits per digit + radix = target base + + Return the number of digits in the specified radix that would be + needed to express 'num' digits of 'qty' bits each. + */ +int mp_value_radix_size(int num, int qty, int radix) +{ + ARGCHK(num >= 0 && qty > 0 && radix >= 2 && radix <= MAX_RADIX, 0); + + return s_mp_outlen(num * qty, radix); + +} /* end mp_value_radix_size() */ + +/* }}} */ + +/* {{{ mp_toradix(mp, str, radix) */ + +mp_err mp_toradix(mp_int *mp, unsigned char *str, int radix) +{ + int ix, pos = 0; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE); + + if(mp_cmp_z(mp) == MP_EQ) { + str[0] = '0'; + str[1] = '\0'; + } else { + mp_err res; + mp_int tmp; + mp_sign sgn; + mp_digit rem, rdx = (mp_digit)radix; + char ch; + + if((res = mp_init_copy(&tmp, mp)) != MP_OKAY) + return res; + + /* Save sign for later, and take absolute value */ + sgn = SIGN(&tmp); SIGN(&tmp) = MP_ZPOS; + + /* Generate output digits in reverse order */ + while(mp_cmp_z(&tmp) != 0) { + if((res = s_mp_div_d(&tmp, rdx, &rem)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + /* Generate digits, use capital letters */ + ch = s_mp_todigit(rem, radix, 0); + + str[pos++] = ch; + } + + /* Add - sign if original value was negative */ + if(sgn == MP_NEG) + str[pos++] = '-'; + + /* Add trailing NUL to end the string */ + str[pos--] = '\0'; + + /* Reverse the digits and sign indicator */ + ix = 0; + while(ix < pos) { + char tmp = str[ix]; + + str[ix] = str[pos]; + str[pos] = tmp; + ++ix; + --pos; + } + + mp_clear(&tmp); + } + + return MP_OKAY; + +} /* end mp_toradix() */ + +/* }}} */ + +/* {{{ mp_char2value(ch, r) */ + +int mp_char2value(char ch, int r) +{ + return s_mp_tovalue(ch, r); + +} /* end mp_tovalue() */ + +/* }}} */ + +/* }}} */ + +/* {{{ mp_strerror(ec) */ + +/* + mp_strerror(ec) + + Return a string describing the meaning of error code 'ec'. The + string returned is allocated in static memory, so the caller should + not attempt to modify or free the memory associated with this + string. + */ +const char *mp_strerror(mp_err ec) +{ + int aec = (ec < 0) ? -ec : ec; + + /* Code values are negative, so the senses of these comparisons + are accurate */ + if(ec < MP_LAST_CODE || ec > MP_OKAY) { + return mp_err_string[0]; /* unknown error code */ + } else { + return mp_err_string[aec + 1]; + } + +} /* end mp_strerror() */ + +/* }}} */ + +/*========================================================================*/ +/*------------------------------------------------------------------------*/ +/* Static function definitions (internal use only) */ + +/* {{{ Memory management */ + +/* {{{ s_mp_grow(mp, min) */ + +/* Make sure there are at least 'min' digits allocated to mp */ +mp_err s_mp_grow(mp_int *mp, mp_size min) +{ + if(min > ALLOC(mp)) { + mp_digit *tmp; + + /* Set min to next nearest default precision block size */ + min = ((min + (s_mp_defprec - 1)) / s_mp_defprec) * s_mp_defprec; + + if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(mp), tmp, USED(mp)); + +#if MP_CRYPTO + s_mp_setz(DIGITS(mp), ALLOC(mp)); +#endif + s_mp_free(DIGITS(mp)); + DIGITS(mp) = tmp; + ALLOC(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_grow() */ + +/* }}} */ + +/* {{{ s_mp_pad(mp, min) */ + +/* Make sure the used size of mp is at least 'min', growing if needed */ +mp_err s_mp_pad(mp_int *mp, mp_size min) +{ + if(min > USED(mp)) { + mp_err res; + + /* Make sure there is room to increase precision */ + if(min > ALLOC(mp) && (res = s_mp_grow(mp, min)) != MP_OKAY) + return res; + + /* Increase precision; should already be 0-filled */ + USED(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_pad() */ + +/* }}} */ + +/* {{{ s_mp_setz(dp, count) */ + +#if MP_MACRO == 0 +/* Set 'count' digits pointed to by dp to be zeroes */ +void s_mp_setz(mp_digit *dp, mp_size count) +{ +#if MP_MEMSET == 0 + int ix; + + for(ix = 0; ix < count; ix++) + dp[ix] = 0; +#else + memset(dp, 0, count * sizeof(mp_digit)); +#endif + +} /* end s_mp_setz() */ +#endif + +/* }}} */ + +/* {{{ s_mp_copy(sp, dp, count) */ + +#if MP_MACRO == 0 +/* Copy 'count' digits from sp to dp */ +void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count) +{ +#if MP_MEMCPY == 0 + int ix; + + for(ix = 0; ix < count; ix++) + dp[ix] = sp[ix]; +#else + memcpy(dp, sp, count * sizeof(mp_digit)); +#endif + +} /* end s_mp_copy() */ +#endif + +/* }}} */ + +/* {{{ s_mp_alloc(nb, ni) */ + +#if MP_MACRO == 0 +/* Allocate ni records of nb bytes each, and return a pointer to that */ +void *s_mp_alloc(size_t nb, size_t ni) +{ + return calloc(nb, ni); + +} /* end s_mp_alloc() */ +#endif + +/* }}} */ + +/* {{{ s_mp_free(ptr) */ + +#if MP_MACRO == 0 +/* Free the memory pointed to by ptr */ +void s_mp_free(void *ptr) +{ + if(ptr) + free(ptr); + +} /* end s_mp_free() */ +#endif + +/* }}} */ + +/* {{{ s_mp_clamp(mp) */ + +/* Remove leading zeroes from the given value */ +void s_mp_clamp(mp_int *mp) +{ + mp_size du = USED(mp); + mp_digit *zp = DIGITS(mp) + du - 1; + + while(du > 1 && !*zp--) + --du; + + USED(mp) = du; + +} /* end s_mp_clamp() */ + + +/* }}} */ + +/* {{{ s_mp_exch(a, b) */ + +/* Exchange the data for a and b; (b, a) = (a, b) */ +void s_mp_exch(mp_int *a, mp_int *b) +{ + mp_int tmp; + + tmp = *a; + *a = *b; + *b = tmp; + +} /* end s_mp_exch() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Arithmetic helpers */ + +/* {{{ s_mp_lshd(mp, p) */ + +/* + Shift mp leftward by p digits, growing if needed, and zero-filling + the in-shifted digits at the right end. This is a convenient + alternative to multiplication by powers of the radix + */ + +mp_err s_mp_lshd(mp_int *mp, mp_size p) +{ + mp_err res; + mp_size pos; + mp_digit *dp; + int ix; + + if(p == 0) + return MP_OKAY; + + if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY) + return res; + + pos = USED(mp) - 1; + dp = DIGITS(mp); + + /* Shift all the significant figures over as needed */ + for(ix = pos - p; ix >= 0; ix--) + dp[ix + p] = dp[ix]; + + /* Fill the bottom digits with zeroes */ + for(ix = 0; ix < p; ix++) + dp[ix] = 0; + + return MP_OKAY; + +} /* end s_mp_lshd() */ + +/* }}} */ + +/* {{{ s_mp_rshd(mp, p) */ + +/* + Shift mp rightward by p digits. Maintains the invariant that + digits above the precision are all zero. Digits shifted off the + end are lost. Cannot fail. + */ + +void s_mp_rshd(mp_int *mp, mp_size p) +{ + mp_size ix; + mp_digit *dp; + + if(p == 0) + return; + + /* Shortcut when all digits are to be shifted off */ + if(p >= USED(mp)) { + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = MP_ZPOS; + return; + } + + /* Shift all the significant figures over as needed */ + dp = DIGITS(mp); + for(ix = p; ix < USED(mp); ix++) + dp[ix - p] = dp[ix]; + + /* Fill the top digits with zeroes */ + ix -= p; + while(ix < USED(mp)) + dp[ix++] = 0; + + /* Strip off any leading zeroes */ + s_mp_clamp(mp); + +} /* end s_mp_rshd() */ + +/* }}} */ + +/* {{{ s_mp_div_2(mp) */ + +/* Divide by two -- take advantage of radix properties to do it fast */ +void s_mp_div_2(mp_int *mp) +{ + s_mp_div_2d(mp, 1); + +} /* end s_mp_div_2() */ + +/* }}} */ + +/* {{{ s_mp_mul_2(mp) */ + +mp_err s_mp_mul_2(mp_int *mp) +{ + int ix; + mp_digit kin = 0, kout, *dp = DIGITS(mp); + mp_err res; + + /* Shift digits leftward by 1 bit */ + for(ix = 0; ix < USED(mp); ix++) { + kout = (dp[ix] >> (DIGIT_BIT - 1)) & 1; + dp[ix] = (dp[ix] << 1) | kin; + + kin = kout; + } + + /* Deal with rollover from last digit */ + if(kin) { + if(ix >= ALLOC(mp)) { + if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY) + return res; + dp = DIGITS(mp); + } + + dp[ix] = kin; + USED(mp) += 1; + } + + return MP_OKAY; + +} /* end s_mp_mul_2() */ + +/* }}} */ + +/* {{{ s_mp_mod_2d(mp, d) */ + +/* + Remainder the integer by 2^d, where d is a number of bits. This + amounts to a bitwise AND of the value, and does not require the full + division code + */ +void s_mp_mod_2d(mp_int *mp, mp_digit d) +{ + unsigned int ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT); + unsigned int ix; + mp_digit dmask, *dp = DIGITS(mp); + + if(ndig >= USED(mp)) + return; + + /* Flush all the bits above 2^d in its digit */ + dmask = (1 << nbit) - 1; + dp[ndig] &= dmask; + + /* Flush all digits above the one with 2^d in it */ + for(ix = ndig + 1; ix < USED(mp); ix++) + dp[ix] = 0; + + s_mp_clamp(mp); + +} /* end s_mp_mod_2d() */ + +/* }}} */ + +/* {{{ s_mp_mul_2d(mp, d) */ + +/* + Multiply by the integer 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value, and does not require the + full multiplication code. + */ +mp_err s_mp_mul_2d(mp_int *mp, mp_digit d) +{ + mp_err res; + mp_digit save, next, mask, *dp; + mp_size used; + int ix; + + if((res = s_mp_lshd(mp, d / DIGIT_BIT)) != MP_OKAY) + return res; + + dp = DIGITS(mp); used = USED(mp); + d %= DIGIT_BIT; + + mask = (1 << d) - 1; + + /* If the shift requires another digit, make sure we've got one to + work with */ + if((dp[used - 1] >> (DIGIT_BIT - d)) & mask) { + if((res = s_mp_grow(mp, used + 1)) != MP_OKAY) + return res; + dp = DIGITS(mp); + } + + /* Do the shifting... */ + save = 0; + for(ix = 0; ix < used; ix++) { + next = (dp[ix] >> (DIGIT_BIT - d)) & mask; + dp[ix] = (dp[ix] << d) | save; + save = next; + } + + /* If, at this point, we have a nonzero carryout into the next + digit, we'll increase the size by one digit, and store it... + */ + if(save) { + dp[used] = save; + USED(mp) += 1; + } + + s_mp_clamp(mp); + return MP_OKAY; + +} /* end s_mp_mul_2d() */ + +/* }}} */ + +/* {{{ s_mp_div_2d(mp, d) */ + +/* + Divide the integer by 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value, and does not require the + full division code (used in Barrett reduction, see below) + */ +void s_mp_div_2d(mp_int *mp, mp_digit d) +{ + int ix; + mp_digit save, next, mask, *dp = DIGITS(mp); + + s_mp_rshd(mp, d / DIGIT_BIT); + d %= DIGIT_BIT; + + mask = (1 << d) - 1; + + save = 0; + for(ix = USED(mp) - 1; ix >= 0; ix--) { + next = dp[ix] & mask; + dp[ix] = (dp[ix] >> d) | (save << (DIGIT_BIT - d)); + save = next; + } + + s_mp_clamp(mp); + +} /* end s_mp_div_2d() */ + +/* }}} */ + +/* {{{ s_mp_norm(a, b) */ + +/* + s_mp_norm(a, b) + + Normalize a and b for division, where b is the divisor. In order + that we might make good guesses for quotient digits, we want the + leading digit of b to be at least half the radix, which we + accomplish by multiplying a and b by a constant. This constant is + returned (so that it can be divided back out of the remainder at the + end of the division process). + + We multiply by the smallest power of 2 that gives us a leading digit + at least half the radix. By choosing a power of 2, we simplify the + multiplication and division steps to simple shifts. + */ +mp_digit s_mp_norm(mp_int *a, mp_int *b) +{ + mp_digit t, d = 0; + + t = DIGIT(b, USED(b) - 1); + while(t < (RADIX / 2)) { + t <<= 1; + ++d; + } + + if(d != 0) { + s_mp_mul_2d(a, d); + s_mp_mul_2d(b, d); + } + + return d; + +} /* end s_mp_norm() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive digit arithmetic */ + +/* {{{ s_mp_add_d(mp, d) */ + +/* Add d to |mp| in place */ +mp_err s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */ +{ + mp_word w, k = 0; + mp_size ix = 1, used = USED(mp); + mp_digit *dp = DIGITS(mp); + + w = dp[0] + d; + dp[0] = ACCUM(w); + k = CARRYOUT(w); + + while(ix < used && k) { + w = dp[ix] + k; + dp[ix] = ACCUM(w); + k = CARRYOUT(w); + ++ix; + } + + if(k != 0) { + mp_err res; + + if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY) + return res; + + DIGIT(mp, ix) = k; + } + + return MP_OKAY; + +} /* end s_mp_add_d() */ + +/* }}} */ + +/* {{{ s_mp_sub_d(mp, d) */ + +/* Subtract d from |mp| in place, assumes |mp| > d */ +mp_err s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */ +{ + mp_word w, b = 0; + mp_size ix = 1, used = USED(mp); + mp_digit *dp = DIGITS(mp); + + /* Compute initial subtraction */ + w = (RADIX + dp[0]) - d; + b = CARRYOUT(w) ? 0 : 1; + dp[0] = ACCUM(w); + + /* Propagate borrows leftward */ + while(b && ix < used) { + w = (RADIX + dp[ix]) - b; + b = CARRYOUT(w) ? 0 : 1; + dp[ix] = ACCUM(w); + ++ix; + } + + /* Remove leading zeroes */ + s_mp_clamp(mp); + + /* If we have a borrow out, it's a violation of the input invariant */ + if(b) + return MP_RANGE; + else + return MP_OKAY; + +} /* end s_mp_sub_d() */ + +/* }}} */ + +/* {{{ s_mp_mul_d(a, d) */ + +/* Compute a = a * d, single digit multiplication */ +mp_err s_mp_mul_d(mp_int *a, mp_digit d) +{ + mp_word w, k = 0; + mp_size ix, max; + mp_err res; + mp_digit *dp = DIGITS(a); + + /* + Single-digit multiplication will increase the precision of the + output by at most one digit. However, we can detect when this + will happen -- if the high-order digit of a, times d, gives a + two-digit result, then the precision of the result will increase; + otherwise it won't. We use this fact to avoid calling s_mp_pad() + unless absolutely necessary. + */ + max = USED(a); + w = dp[max - 1] * d; + if(CARRYOUT(w) != 0) { + if((res = s_mp_pad(a, max + 1)) != MP_OKAY) + return res; + dp = DIGITS(a); + } + + for(ix = 0; ix < max; ix++) { + w = (dp[ix] * d) + k; + dp[ix] = ACCUM(w); + k = CARRYOUT(w); + } + + /* If there is a precision increase, take care of it here; the above + test guarantees we have enough storage to do this safely. + */ + if(k) { + dp[max] = k; + USED(a) = max + 1; + } + + s_mp_clamp(a); + + return MP_OKAY; + +} /* end s_mp_mul_d() */ + +/* }}} */ + +/* {{{ s_mp_div_d(mp, d, r) */ + +/* + s_mp_div_d(mp, d, r) + + Compute the quotient mp = mp / d and remainder r = mp mod d, for a + single digit d. If r is null, the remainder will be discarded. + */ + +mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r) +{ + mp_word w = 0, t; + mp_int quot; + mp_err res; + mp_digit *dp = DIGITS(mp), *qp; + int ix; + + if(d == 0) + return MP_RANGE; + + /* Make room for the quotient */ + if((res = mp_init_size(", USED(mp))) != MP_OKAY) + return res; + + USED(") = USED(mp); /* so clamping will work below */ + qp = DIGITS("); + + /* Divide without subtraction */ + for(ix = USED(mp) - 1; ix >= 0; ix--) { + w = (w << DIGIT_BIT) | dp[ix]; + + if(w >= d) { + t = w / d; + w = w % d; + } else { + t = 0; + } + + qp[ix] = t; + } + + /* Deliver the remainder, if desired */ + if(r) + *r = w; + + s_mp_clamp("); + mp_exch(", mp); + mp_clear("); + + return MP_OKAY; + +} /* end s_mp_div_d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive full arithmetic */ + +/* {{{ s_mp_add(a, b) */ + +/* Compute a = |a| + |b| */ +mp_err s_mp_add(mp_int *a, mp_int *b) /* magnitude addition */ +{ + mp_word w = 0; + mp_digit *pa, *pb; + mp_size ix, used = USED(b); + mp_err res; + + /* Make sure a has enough precision for the output value */ + if((used > USED(a)) && (res = s_mp_pad(a, used)) != MP_OKAY) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + padding step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + pa = DIGITS(a); + pb = DIGITS(b); + for(ix = 0; ix < used; ++ix) { + w += *pa + *pb++; + *pa++ = ACCUM(w); + w = CARRYOUT(w); + } + + /* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ + used = USED(a); + while(w && ix < used) { + w += *pa; + *pa++ = ACCUM(w); + w = CARRYOUT(w); + ++ix; + } + + /* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ + if(w) { + if((res = s_mp_pad(a, used + 1)) != MP_OKAY) + return res; + + DIGIT(a, ix) = w; /* pa may not be valid after s_mp_pad() call */ + } + + return MP_OKAY; + +} /* end s_mp_add() */ + +/* }}} */ + +/* {{{ s_mp_sub(a, b) */ + +/* Compute a = |a| - |b|, assumes |a| >= |b| */ +mp_err s_mp_sub(mp_int *a, mp_int *b) /* magnitude subtract */ +{ + mp_word w = 0; + mp_digit *pa, *pb; + mp_size ix, used = USED(b); + + /* + Subtract and propagate borrow. Up to the precision of b, this + accounts for the digits of b; after that, we just make sure the + carries get to the right place. This saves having to pad b out to + the precision of a just to make the loops work right... + */ + pa = DIGITS(a); + pb = DIGITS(b); + + for(ix = 0; ix < used; ++ix) { + w = (RADIX + *pa) - w - *pb++; + *pa++ = ACCUM(w); + w = CARRYOUT(w) ? 0 : 1; + } + + used = USED(a); + while(ix < used) { + w = RADIX + *pa - w; + *pa++ = ACCUM(w); + w = CARRYOUT(w) ? 0 : 1; + ++ix; + } + + /* Clobber any leading zeroes we created */ + s_mp_clamp(a); + + /* + If there was a borrow out, then |b| > |a| in violation + of our input invariant. We've already done the work, + but we'll at least complain about it... + */ + if(w) + return MP_RANGE; + else + return MP_OKAY; + +} /* end s_mp_sub() */ + +/* }}} */ + +mp_err s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu) +{ + mp_int q; + mp_err res; + mp_size um = USED(m); + + if((res = mp_init_copy(&q, x)) != MP_OKAY) + return res; + + s_mp_rshd(&q, um - 1); /* q1 = x / b^(k-1) */ + s_mp_mul(&q, mu); /* q2 = q1 * mu */ + s_mp_rshd(&q, um + 1); /* q3 = q2 / b^(k+1) */ + + /* x = x mod b^(k+1), quick (no division) */ + s_mp_mod_2d(x, (mp_digit)(DIGIT_BIT * (um + 1))); + + /* q = q * m mod b^(k+1), quick (no division), uses the short multiplier */ +#ifndef SHRT_MUL + s_mp_mul(&q, m); + s_mp_mod_2d(&q, (mp_digit)(DIGIT_BIT * (um + 1))); +#else + s_mp_mul_dig(&q, m, um + 1); +#endif + + /* x = x - q */ + if((res = mp_sub(x, &q, x)) != MP_OKAY) + goto CLEANUP; + + /* If x < 0, add b^(k+1) to it */ + if(mp_cmp_z(x) < 0) { + mp_set(&q, 1); + if((res = s_mp_lshd(&q, um + 1)) != MP_OKAY) + goto CLEANUP; + if((res = mp_add(x, &q, x)) != MP_OKAY) + goto CLEANUP; + } + + /* Back off if it's too big */ + while(mp_cmp(x, m) >= 0) { + if((res = s_mp_sub(x, m)) != MP_OKAY) + break; + } + + CLEANUP: + mp_clear(&q); + + return res; + +} /* end s_mp_reduce() */ + + + +/* {{{ s_mp_mul(a, b) */ + +/* Compute a = |a| * |b| */ +mp_err s_mp_mul(mp_int *a, mp_int *b) +{ + mp_word w, k = 0; + mp_int tmp; + mp_err res; + mp_size ix, jx, ua = USED(a), ub = USED(b); + mp_digit *pa, *pb, *pt, *pbt; + + if((res = mp_init_size(&tmp, ua + ub)) != MP_OKAY) + return res; + + /* This has the effect of left-padding with zeroes... */ + USED(&tmp) = ua + ub; + + /* We're going to need the base value each iteration */ + pbt = DIGITS(&tmp); + + /* Outer loop: Digits of b */ + + pb = DIGITS(b); + for(ix = 0; ix < ub; ++ix, ++pb) { + if(*pb == 0) + continue; + + /* Inner product: Digits of a */ + pa = DIGITS(a); + for(jx = 0; jx < ua; ++jx, ++pa) { + pt = pbt + ix + jx; + w = *pb * *pa + k + *pt; + *pt = ACCUM(w); + k = CARRYOUT(w); + } + + pbt[ix + jx] = k; + k = 0; + } + + s_mp_clamp(&tmp); + s_mp_exch(&tmp, a); + + mp_clear(&tmp); + + return MP_OKAY; + +} /* end s_mp_mul() */ + +/* }}} */ + +/* {{{ s_mp_kmul(a, b, out, len) */ + +#if 0 +void s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len) +{ + mp_word w, k = 0; + mp_size ix, jx; + mp_digit *pa, *pt; + + for(ix = 0; ix < len; ++ix, ++b) { + if(*b == 0) + continue; + + pa = a; + for(jx = 0; jx < len; ++jx, ++pa) { + pt = out + ix + jx; + w = *b * *pa + k + *pt; + *pt = ACCUM(w); + k = CARRYOUT(w); + } + + out[ix + jx] = k; + k = 0; + } + +} /* end s_mp_kmul() */ +#endif + +/* }}} */ + +/* {{{ s_mp_sqr(a) */ + +/* + Computes the square of a, in place. This can be done more + efficiently than a general multiplication, because many of the + computation steps are redundant when squaring. The inner product + step is a bit more complicated, but we save a fair number of + iterations of the multiplication loop. + */ +#if MP_SQUARE +mp_err s_mp_sqr(mp_int *a) +{ + mp_word w, k = 0; + mp_int tmp; + mp_err res; + mp_size ix, jx, kx, used = USED(a); + mp_digit *pa1, *pa2, *pt, *pbt; + + if((res = mp_init_size(&tmp, 2 * used)) != MP_OKAY) + return res; + + /* Left-pad with zeroes */ + USED(&tmp) = 2 * used; + + /* We need the base value each time through the loop */ + pbt = DIGITS(&tmp); + + pa1 = DIGITS(a); + for(ix = 0; ix < used; ++ix, ++pa1) { + if(*pa1 == 0) + continue; + + w = DIGIT(&tmp, ix + ix) + (*pa1 * *pa1); + + pbt[ix + ix] = ACCUM(w); + k = CARRYOUT(w); + + /* + The inner product is computed as: + + (C, S) = t[i,j] + 2 a[i] a[j] + C + + This can overflow what can be represented in an mp_word, and + since C arithmetic does not provide any way to check for + overflow, we have to check explicitly for overflow conditions + before they happen. + */ + for(jx = ix + 1, pa2 = DIGITS(a) + jx; jx < used; ++jx, ++pa2) { + mp_word u = 0, v; + + /* Store this in a temporary to avoid indirections later */ + pt = pbt + ix + jx; + + /* Compute the multiplicative step */ + w = *pa1 * *pa2; + + /* If w is more than half MP_WORD_MAX, the doubling will + overflow, and we need to record a carry out into the next + word */ + u = (w >> (MP_WORD_BIT - 1)) & 1; + + /* Double what we've got, overflow will be ignored as defined + for C arithmetic (we've already noted if it is to occur) + */ + w *= 2; + + /* Compute the additive step */ + v = *pt + k; + + /* If we do not already have an overflow carry, check to see + if the addition will cause one, and set the carry out if so + */ + u |= ((MP_WORD_MAX - v) < w); + + /* Add in the rest, again ignoring overflow */ + w += v; + + /* Set the i,j digit of the output */ + *pt = ACCUM(w); + + /* Save carry information for the next iteration of the loop. + This is why k must be an mp_word, instead of an mp_digit */ + k = CARRYOUT(w) | (u << DIGIT_BIT); + + } /* for(jx ...) */ + + /* Set the last digit in the cycle and reset the carry */ + k = DIGIT(&tmp, ix + jx) + k; + pbt[ix + jx] = ACCUM(k); + k = CARRYOUT(k); + + /* If we are carrying out, propagate the carry to the next digit + in the output. This may cascade, so we have to be somewhat + circumspect -- but we will have enough precision in the output + that we won't overflow + */ + kx = 1; + while(k) { + k = pbt[ix + jx + kx] + 1; + pbt[ix + jx + kx] = ACCUM(k); + k = CARRYOUT(k); + ++kx; + } + } /* for(ix ...) */ + + s_mp_clamp(&tmp); + s_mp_exch(&tmp, a); + + mp_clear(&tmp); + + return MP_OKAY; + +} /* end s_mp_sqr() */ +#endif + +/* }}} */ + +/* {{{ s_mp_div(a, b) */ + +/* + s_mp_div(a, b) + + Compute a = a / b and b = a mod b. Assumes b > a. + */ + +mp_err s_mp_div(mp_int *a, mp_int *b) +{ + mp_int quot, rem, t; + mp_word q; + mp_err res; + mp_digit d; + int ix; + + if(mp_cmp_z(b) == 0) + return MP_RANGE; + + /* Shortcut if b is power of two */ + if((ix = s_mp_ispow2(b)) >= 0) { + mp_copy(a, b); /* need this for remainder */ + s_mp_div_2d(a, (mp_digit)ix); + s_mp_mod_2d(b, (mp_digit)ix); + + return MP_OKAY; + } + + /* Allocate space to store the quotient */ + if((res = mp_init_size(", USED(a))) != MP_OKAY) + return res; + + /* A working temporary for division */ + if((res = mp_init_size(&t, USED(a))) != MP_OKAY) + goto T; + + /* Allocate space for the remainder */ + if((res = mp_init_size(&rem, USED(a))) != MP_OKAY) + goto REM; + + /* Normalize to optimize guessing */ + d = s_mp_norm(a, b); + + /* Perform the division itself...woo! */ + ix = USED(a) - 1; + + while(ix >= 0) { + /* Find a partial substring of a which is at least b */ + while(s_mp_cmp(&rem, b) < 0 && ix >= 0) { + if((res = s_mp_lshd(&rem, 1)) != MP_OKAY) + goto CLEANUP; + + if((res = s_mp_lshd(", 1)) != MP_OKAY) + goto CLEANUP; + + DIGIT(&rem, 0) = DIGIT(a, ix); + s_mp_clamp(&rem); + --ix; + } + + /* If we didn't find one, we're finished dividing */ + if(s_mp_cmp(&rem, b) < 0) + break; + + /* Compute a guess for the next quotient digit */ + q = DIGIT(&rem, USED(&rem) - 1); + if(q <= DIGIT(b, USED(b) - 1) && USED(&rem) > 1) + q = (q << DIGIT_BIT) | DIGIT(&rem, USED(&rem) - 2); + + q /= DIGIT(b, USED(b) - 1); + + /* The guess can be as much as RADIX + 1 */ + if(q >= RADIX) + q = RADIX - 1; + + /* See what that multiplies out to */ + mp_copy(b, &t); + if((res = s_mp_mul_d(&t, q)) != MP_OKAY) + goto CLEANUP; + + /* + If it's too big, back it off. We should not have to do this + more than once, or, in rare cases, twice. Knuth describes a + method by which this could be reduced to a maximum of once, but + I didn't implement that here. + */ + while(s_mp_cmp(&t, &rem) > 0) { + --q; + s_mp_sub(&t, b); + } + + /* At this point, q should be the right next digit */ + if((res = s_mp_sub(&rem, &t)) != MP_OKAY) + goto CLEANUP; + + /* + Include the digit in the quotient. We allocated enough memory + for any quotient we could ever possibly get, so we should not + have to check for failures here + */ + DIGIT(", 0) = q; + } + + /* Denormalize remainder */ + if(d != 0) + s_mp_div_2d(&rem, d); + + s_mp_clamp("); + s_mp_clamp(&rem); + + /* Copy quotient back to output */ + s_mp_exch(", a); + + /* Copy remainder back to output */ + s_mp_exch(&rem, b); + +CLEANUP: + mp_clear(&rem); +REM: + mp_clear(&t); +T: + mp_clear("); + + return res; + +} /* end s_mp_div() */ + +/* }}} */ + +/* {{{ s_mp_2expt(a, k) */ + +mp_err s_mp_2expt(mp_int *a, mp_digit k) +{ + mp_err res; + mp_size dig, bit; + + dig = k / DIGIT_BIT; + bit = k % DIGIT_BIT; + + mp_zero(a); + if((res = s_mp_pad(a, dig + 1)) != MP_OKAY) + return res; + + DIGIT(a, dig) |= (1 << bit); + + return MP_OKAY; + +} /* end s_mp_2expt() */ + +/* }}} */ + + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive comparisons */ + +/* {{{ s_mp_cmp(a, b) */ + +/* Compare |a| <=> |b|, return 0 if equal, <0 if a0 if a>b */ +int s_mp_cmp(mp_int *a, mp_int *b) +{ + mp_size ua = USED(a), ub = USED(b); + + if(ua > ub) + return MP_GT; + else if(ua < ub) + return MP_LT; + else { + int ix = ua - 1; + mp_digit *ap = DIGITS(a) + ix, *bp = DIGITS(b) + ix; + + while(ix >= 0) { + if(*ap > *bp) + return MP_GT; + else if(*ap < *bp) + return MP_LT; + + --ap; --bp; --ix; + } + + return MP_EQ; + } + +} /* end s_mp_cmp() */ + +/* }}} */ + +/* {{{ s_mp_cmp_d(a, d) */ + +/* Compare |a| <=> d, return 0 if equal, <0 if a0 if a>d */ +int s_mp_cmp_d(mp_int *a, mp_digit d) +{ + mp_size ua = USED(a); + mp_digit *ap = DIGITS(a); + + if(ua > 1) + return MP_GT; + + if(*ap < d) + return MP_LT; + else if(*ap > d) + return MP_GT; + else + return MP_EQ; + +} /* end s_mp_cmp_d() */ + +/* }}} */ + +/* {{{ s_mp_ispow2(v) */ + +/* + Returns -1 if the value is not a power of two; otherwise, it returns + k such that v = 2^k, i.e. lg(v). + */ +int s_mp_ispow2(mp_int *v) +{ + mp_digit d, *dp; + mp_size uv = USED(v); + int extra = 0, ix; + + d = DIGIT(v, uv - 1); /* most significant digit of v */ + + while(d && ((d & 1) == 0)) { + d >>= 1; + ++extra; + } + + if(d == 1) { + ix = uv - 2; + dp = DIGITS(v) + ix; + + while(ix >= 0) { + if(*dp) + return -1; /* not a power of two */ + + --dp; --ix; + } + + return ((uv - 1) * DIGIT_BIT) + extra; + } + + return -1; + +} /* end s_mp_ispow2() */ + +/* }}} */ + +/* {{{ s_mp_ispow2d(d) */ + +int s_mp_ispow2d(mp_digit d) +{ + int pow = 0; + + while((d & 1) == 0) { + ++pow; d >>= 1; + } + + if(d == 1) + return pow; + + return -1; + +} /* end s_mp_ispow2d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive I/O helpers */ + +/* {{{ s_mp_tovalue(ch, r) */ + +/* + Convert the given character to its digit value, in the given radix. + If the given character is not understood in the given radix, -1 is + returned. Otherwise the digit's numeric value is returned. + + The results will be odd if you use a radix < 2 or > 62, you are + expected to know what you're up to. + */ +int s_mp_tovalue(char ch, int r) +{ + int val, xch; + + if(r > 36) + xch = ch; + else + xch = toupper(ch); + + if(isdigit(xch)) + val = xch - '0'; + else if(isupper(xch)) + val = xch - 'A' + 10; + else if(islower(xch)) + val = xch - 'a' + 36; + else if(xch == '+') + val = 62; + else if(xch == '/') + val = 63; + else + return -1; + + if(val < 0 || val >= r) + return -1; + + return val; + +} /* end s_mp_tovalue() */ + +/* }}} */ + +/* {{{ s_mp_todigit(val, r, low) */ + +/* + Convert val to a radix-r digit, if possible. If val is out of range + for r, returns zero. Otherwise, returns an ASCII character denoting + the value in the given radix. + + The results may be odd if you use a radix < 2 or > 64, you are + expected to know what you're doing. + */ + +char s_mp_todigit(int val, int r, int low) +{ + char ch; + + if(val < 0 || val >= r) + return 0; + + ch = s_dmap_1[val]; + + if(r <= 36 && low) + ch = tolower(ch); + + return ch; + +} /* end s_mp_todigit() */ + +/* }}} */ + +/* {{{ s_mp_outlen(bits, radix) */ + +/* + Return an estimate for how long a string is needed to hold a radix + r representation of a number with 'bits' significant bits. + + Does not include space for a sign or a NUL terminator. + */ +int s_mp_outlen(int bits, int r) +{ + return (int)((double)bits * LOG_V_2(r)); + +} /* end s_mp_outlen() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ +/* crc==4242132123, version==2, Sat Feb 02 06:43:52 2002 */ diff --git a/mtest/mtest.c b/mtest/mtest.c index 33ff215fd..2c248256c 100644 --- a/mtest/mtest.c +++ b/mtest/mtest.c @@ -110,7 +110,7 @@ int main(void) t1 = clock(); for (;;) { if (clock() - t1 > CLOCKS_PER_SEC) { - sleep(1); + sleep(2); t1 = clock(); } diff --git a/pics/makefile~ b/pics/makefile~ deleted file mode 100644 index 4d9bd6bd6..000000000 --- a/pics/makefile~ +++ /dev/null @@ -1,35 +0,0 @@ -# makes the images... yeah - -default: pses - -design_process.ps: design_process.tif - tiff2ps -s -e design_process.tif > design_process.ps - -sliding_window.ps: sliding_window.tif - tiff2ps -e sliding_window.tif > sliding_window.ps - -expt_state.ps: expt_state.tif - tiff2ps -e expt_state.tif > expt_state.ps - -primality.ps: primality.tif - tiff2ps -e primality.tif > primality.ps - -design_process.pdf: design_process.ps - epstopdf design_process.ps - -sliding_window.pdf: sliding_window.ps - epstopdf sliding_window.ps - -expt_state.pdf: expt_state.ps - epstopdf expt_state.ps - -primality.pdf: primality.ps - epstopdf primality.ps - - -pses: sliding_window.ps expt_state.ps primality.ps design_process.ps -pdfes: sliding_window.pdf expt_state.pdf primality.pdf design_process.pdf - -clean: - rm -rf *.ps *.pdf .xvpics - \ No newline at end of file diff --git a/poster.pdf b/poster.pdf index dc71d8b9e..3731bd2bd 100644 Binary files a/poster.pdf and b/poster.pdf differ diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c index 52288a97a..1f9997f1a 100644 --- a/pre_gen/mpi.c +++ b/pre_gen/mpi.c @@ -631,8 +631,7 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) * Based on Algorithm 14.16 on pp.597 of HAC. * */ -int -fast_s_mp_sqr (mp_int * a, mp_int * b) +int fast_s_mp_sqr (mp_int * a, mp_int * b) { int olduse, newused, res, ix, pa; mp_word W2[MP_WARRAY], W[MP_WARRAY]; @@ -1345,11 +1344,15 @@ int mp_cmp_mag (mp_int * a, mp_int * b) */ #include +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + /* Counts the number of lsbs which are zero before the first zero bit */ int mp_cnt_lsb(mp_int *a) { int x; - mp_digit q; + mp_digit q, qq; /* easy out */ if (mp_iszero(a) == 1) { @@ -1362,11 +1365,13 @@ int mp_cnt_lsb(mp_int *a) x *= DIGIT_BIT; /* now scan this digit until a 1 is found */ - while ((q & 1) == 0) { - q >>= 1; - x += 1; + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); } - return x; } @@ -2665,75 +2670,75 @@ __RES:mp_clear (&res); /* End: bn_mp_exptmod_fast.c */ /* Start: bn_mp_exteuclid.c */ -/* LibTomMath, multiple-precision integer library -- Tom St Denis - * - * LibTomMath is a library that provides multiple-precision - * integer arithmetic as well as number theoretic functionality. - * - * The library was designed directly after the MPI library by - * Michael Fromberger but has been written from scratch with - * additional optimizations in place. - * - * The library is free for all purposes without any express - * guarantee it works. - * - * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org - */ -#include - -/* Extended euclidean algorithm of (a, b) produces - a*u1 + b*u2 = u3 - */ -int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3) -{ - mp_int u1,u2,u3,v1,v2,v3,t1,t2,t3,q,tmp; - int err; - - if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) { - return err; - } - - /* initialize, (u1,u2,u3) = (1,0,a) */ - mp_set(&u1, 1); - if ((err = mp_copy(a, &u3)) != MP_OKAY) { goto _ERR; } - - /* initialize, (v1,v2,v3) = (0,1,b) */ - mp_set(&v2, 1); - if ((err = mp_copy(b, &v3)) != MP_OKAY) { goto _ERR; } - - /* loop while v3 != 0 */ - while (mp_iszero(&v3) == MP_NO) { - /* q = u3/v3 */ - if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) { goto _ERR; } - - /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */ - if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) { goto _ERR; } - if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) { goto _ERR; } - if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) { goto _ERR; } - if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) { goto _ERR; } - if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) { goto _ERR; } - if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) { goto _ERR; } - - /* (u1,u2,u3) = (v1,v2,v3) */ - if ((err = mp_copy(&v1, &u1)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&v2, &u2)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&v3, &u3)) != MP_OKAY) { goto _ERR; } - - /* (v1,v2,v3) = (t1,t2,t3) */ - if ((err = mp_copy(&t1, &v1)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&t2, &v2)) != MP_OKAY) { goto _ERR; } - if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; } - } - - /* copy result out */ - if (U1 != NULL) { mp_exch(U1, &u1); } - if (U2 != NULL) { mp_exch(U2, &u2); } - if (U3 != NULL) { mp_exch(U3, &u3); } - - err = MP_OKAY; -_ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL); - return err; -} +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* Extended euclidean algorithm of (a, b) produces + a*u1 + b*u2 = u3 + */ +int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3) +{ + mp_int u1,u2,u3,v1,v2,v3,t1,t2,t3,q,tmp; + int err; + + if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) { + return err; + } + + /* initialize, (u1,u2,u3) = (1,0,a) */ + mp_set(&u1, 1); + if ((err = mp_copy(a, &u3)) != MP_OKAY) { goto _ERR; } + + /* initialize, (v1,v2,v3) = (0,1,b) */ + mp_set(&v2, 1); + if ((err = mp_copy(b, &v3)) != MP_OKAY) { goto _ERR; } + + /* loop while v3 != 0 */ + while (mp_iszero(&v3) == MP_NO) { + /* q = u3/v3 */ + if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) { goto _ERR; } + + /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */ + if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) { goto _ERR; } + if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) { goto _ERR; } + + /* (u1,u2,u3) = (v1,v2,v3) */ + if ((err = mp_copy(&v1, &u1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&v2, &u2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&v3, &u3)) != MP_OKAY) { goto _ERR; } + + /* (v1,v2,v3) = (t1,t2,t3) */ + if ((err = mp_copy(&t1, &v1)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&t2, &v2)) != MP_OKAY) { goto _ERR; } + if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; } + } + + /* copy result out */ + if (U1 != NULL) { mp_exch(U1, &u1); } + if (U2 != NULL) { mp_exch(U2, &u2); } + if (U3 != NULL) { mp_exch(U3, &u3); } + + err = MP_OKAY; +_ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL); + return err; +} /* End: bn_mp_exteuclid.c */ @@ -2828,7 +2833,7 @@ int mp_fwrite(mp_int *a, int radix, FILE *stream) return err; } - buf = XMALLOC (len); + buf = OPT_CAST(char) XMALLOC (len); if (buf == NULL) { return MP_MEM; } @@ -2963,6 +2968,49 @@ __U:mp_clear (&v); /* End: bn_mp_gcd.c */ +/* Start: bn_mp_get_int.c */ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* get the lower 32-bits of an mp_int */ +unsigned long mp_get_int(mp_int * a) +{ + int i; + unsigned long res; + + if (a->used == 0) { + return 0; + } + + /* get number of digits of the lsb we have to read */ + i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1; + + /* get most significant digit of result */ + res = DIGIT(a,i); + + while (--i >= 0) { + res = (res << DIGIT_BIT) | DIGIT(a,i); + } + + /* force result to 32-bits always so it is consistent on non 32-bit platforms */ + return res & 0xFFFFFFFFUL; +} + +/* End: bn_mp_get_int.c */ + /* Start: bn_mp_grow.c */ /* LibTomMath, multiple-precision integer library -- Tom St Denis * @@ -2997,7 +3045,7 @@ int mp_grow (mp_int * a, int size) * in case the operation failed we don't want * to overwrite the dp member of a. */ - tmp = OPT_CAST XREALLOC (a->dp, sizeof (mp_digit) * size); + tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size); if (tmp == NULL) { /* reallocation failed but "a" is still valid [can be freed] */ return MP_MEM; @@ -3039,7 +3087,7 @@ int mp_grow (mp_int * a, int size) int mp_init (mp_int * a) { /* allocate memory required and clear it */ - a->dp = OPT_CAST XCALLOC (sizeof (mp_digit), MP_PREC); + a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC); if (a->dp == NULL) { return MP_MEM; } @@ -3142,6 +3190,65 @@ int mp_init_multi(mp_int *mp, ...) /* End: bn_mp_init_multi.c */ +/* Start: bn_mp_init_set.c */ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* initialize and set a digit */ +int mp_init_set (mp_int * a, mp_digit b) +{ + int err; + if ((err = mp_init(a)) != MP_OKAY) { + return err; + } + mp_set(a, b); + return err; +} + +/* End: bn_mp_init_set.c */ + +/* Start: bn_mp_init_set_int.c */ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* initialize and set a digit */ +int mp_init_set_int (mp_int * a, unsigned long b) +{ + int err; + if ((err = mp_init(a)) != MP_OKAY) { + return err; + } + return mp_set_int(a, b); +} + +/* End: bn_mp_init_set_int.c */ + /* Start: bn_mp_init_size.c */ /* LibTomMath, multiple-precision integer library -- Tom St Denis * @@ -3166,7 +3273,7 @@ int mp_init_size (mp_int * a, int size) size += (MP_PREC * 2) - (size % MP_PREC); /* alloc mem */ - a->dp = OPT_CAST XCALLOC (sizeof (mp_digit), size); + a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), size); if (a->dp == NULL) { return MP_MEM; } @@ -3357,6 +3464,113 @@ __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); /* End: bn_mp_invmod.c */ +/* Start: bn_mp_is_square.c */ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* Check if remainders are possible squares - fast exclude non-squares */ +static const char rem_128[128] = { + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1 +}; + +static const char rem_105[105] = { + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 +}; + +/* Store non-zero to ret if arg is square, and zero if not */ +int mp_is_square(mp_int *arg,int *ret) +{ + int res; + mp_digit c; + mp_int t; + unsigned long r; + + /* Default to Non-square :) */ + *ret = MP_NO; + + if (arg->sign == MP_NEG) { + return MP_VAL; + } + + /* digits used? (TSD) */ + if (arg->used == 0) { + return MP_OKAY; + } + + /* First check mod 128 (suppose that DIGIT_BIT is at least 7) */ + if (rem_128[127 & DIGIT(arg,0)] == 1) { + return MP_OKAY; + } + + /* Next check mod 105 (3*5*7) */ + if ((res = mp_mod_d(arg,105,&c)) != MP_OKAY) { + return res; + } + if (rem_105[c] == 1) { + return MP_OKAY; + } + + /* product of primes less than 2^31 */ + if ((res = mp_init_set_int(&t,11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) { + return res; + } + if ((res = mp_mod(arg,&t,&t)) != MP_OKAY) { + goto ERR; + } + r = mp_get_int(&t); + /* Check for other prime modules, note it's not an ERROR but we must + * free "t" so the easiest way is to goto ERR. We know that res + * is already equal to MP_OKAY from the mp_mod call + */ + if ( (1L<<(r%11)) & 0x5C4L ) goto ERR; + if ( (1L<<(r%13)) & 0x9E4L ) goto ERR; + if ( (1L<<(r%17)) & 0x5CE8L ) goto ERR; + if ( (1L<<(r%19)) & 0x4F50CL ) goto ERR; + if ( (1L<<(r%23)) & 0x7ACCA0L ) goto ERR; + if ( (1L<<(r%29)) & 0xC2EDD0CL ) goto ERR; + if ( (1L<<(r%31)) & 0x6DE2B848L ) goto ERR; + + /* Final check - is sqr(sqrt(arg)) == arg ? */ + if ((res = mp_sqrt(arg,&t)) != MP_OKAY) { + goto ERR; + } + if ((res = mp_sqr(&t,&t)) != MP_OKAY) { + goto ERR; + } + + *ret = (mp_cmp_mag(&t,arg) == MP_EQ) ? MP_YES : MP_NO; +ERR:mp_clear(&t); + return res; +} + +/* End: bn_mp_is_square.c */ + /* Start: bn_mp_jacobi.c */ /* LibTomMath, multiple-precision integer library -- Tom St Denis * @@ -3506,8 +3720,7 @@ __A1:mp_clear (&a1); * Generally though the overhead of this method doesn't pay off * until a certain size (N ~ 80) is reached. */ -int -mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) +int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) { mp_int x0, x1, y0, y1, t1, x0y0, x1y1; int B, err; @@ -3519,7 +3732,7 @@ mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) B = MIN (a->used, b->used); /* now divide in two */ - B = B / 2; + B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) @@ -3653,8 +3866,7 @@ X0:mp_clear (&x0); * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ -int -mp_karatsuba_sqr (mp_int * a, mp_int * b) +int mp_karatsuba_sqr (mp_int * a, mp_int * b) { mp_int x0, x1, t1, t2, x0x0, x1x1; int B, err; @@ -3665,7 +3877,7 @@ mp_karatsuba_sqr (mp_int * a, mp_int * b) B = a->used; /* now divide in two */ - B = B / 2; + B = B >> 1; /* init copy all the temps */ if (mp_init_size (&x0, B) != MP_OKAY) @@ -3896,7 +4108,6 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c) mp_int t; int res; - if ((res = mp_init (&t)) != MP_OKAY) { return res; } @@ -3906,7 +4117,7 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c) return res; } - if (t.sign == MP_NEG) { + if (t.sign != b->sign) { res = mp_add (b, &t, c); } else { res = MP_OKAY; @@ -4661,7 +4872,7 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c) if (mp_cmp (&t2, a) == MP_GT) { if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) { - goto __T3; + goto __T3; } } else { break; @@ -4711,7 +4922,7 @@ int mp_neg (mp_int * a, mp_int * b) if ((res = mp_copy (a, b)) != MP_OKAY) { return res; } - if (mp_iszero(b) != 1) { + if (mp_iszero(b) != MP_YES) { b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; } return MP_OKAY; @@ -5225,7 +5436,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style) /* End: bn_mp_prime_next_prime.c */ -/* Start: bn_mp_prime_random.c */ +/* Start: bn_mp_prime_random_ex.c */ /* LibTomMath, multiple-precision integer library -- Tom St Denis * * LibTomMath is a library that provides multiple-precision @@ -5242,57 +5453,101 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style) */ #include -/* makes a truly random prime of a given size (bytes), - * call with bbs = 1 if you want it to be congruent to 3 mod 4 +/* makes a truly random prime of a given size (bits), + * + * Flags are as follows: + * + * LTM_PRIME_BBS - make prime congruent to 3 mod 4 + * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS) + * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero + * LTM_PRIME_2MSB_ON - make the 2nd highest bit one * * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself * so it can be NULL * - * The prime generated will be larger than 2^(8*size). */ -/* this sole function may hold the key to enslaving all mankind! */ -int mp_prime_random(mp_int *a, int t, int size, int bbs, ltm_prime_callback cb, void *dat) +/* This is possibly the mother of all prime generation functions, muahahahahaha! */ +int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat) { - unsigned char *tmp; - int res, err; + unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb; + int res, err, bsize, maskOR_msb_offset; /* sanity check the input */ - if (size <= 0) { + if (size <= 1 || t <= 0) { return MP_VAL; } - /* we need a buffer of size+1 bytes */ - tmp = XMALLOC(size+1); + /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */ + if (flags & LTM_PRIME_SAFE) { + flags |= LTM_PRIME_BBS; + } + + /* calc the byte size */ + bsize = (size>>3)+(size&7?1:0); + + /* we need a buffer of bsize bytes */ + tmp = OPT_CAST(unsigned char) XMALLOC(bsize); if (tmp == NULL) { return MP_MEM; } - /* fix MSB */ - tmp[0] = 1; + /* calc the maskAND value for the MSbyte*/ + maskAND = 0xFF >> (8 - (size & 7)); + + /* calc the maskOR_msb */ + maskOR_msb = 0; + maskOR_msb_offset = (size - 2) >> 3; + if (flags & LTM_PRIME_2MSB_ON) { + maskOR_msb |= 1 << ((size - 2) & 7); + } else if (flags & LTM_PRIME_2MSB_OFF) { + maskAND &= ~(1 << ((size - 2) & 7)); + } + + /* get the maskOR_lsb */ + maskOR_lsb = 0; + if (flags & LTM_PRIME_BBS) { + maskOR_lsb |= 3; + } do { /* read the bytes */ - if (cb(tmp+1, size, dat) != size) { + if (cb(tmp, bsize, dat) != bsize) { err = MP_VAL; goto error; } - /* fix the LSB */ - tmp[size] |= (bbs ? 3 : 1); + /* work over the MSbyte */ + tmp[0] &= maskAND; + tmp[0] |= 1 << ((size - 1) & 7); + + /* mix in the maskORs */ + tmp[maskOR_msb_offset] |= maskOR_msb; + tmp[bsize-1] |= maskOR_lsb; /* read it in */ - if ((err = mp_read_unsigned_bin(a, tmp, size+1)) != MP_OKAY) { - goto error; - } + if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; } /* is it prime? */ - if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { - goto error; + if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } + + if (flags & LTM_PRIME_SAFE) { + /* see if (a-1)/2 is prime */ + if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; } + if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; } + + /* is it prime? */ + if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } } } while (res == MP_NO); + if (flags & LTM_PRIME_SAFE) { + /* restore a to the original value */ + if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; } + if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; } + } + err = MP_OKAY; error: XFREE(tmp); @@ -5301,7 +5556,7 @@ int mp_prime_random(mp_int *a, int t, int size, int bbs, ltm_prime_callback cb, -/* End: bn_mp_prime_random.c */ +/* End: bn_mp_prime_random_ex.c */ /* Start: bn_mp_radix_size.c */ /* LibTomMath, multiple-precision integer library -- Tom St Denis @@ -5726,9 +5981,9 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu) */ #include -/* reduces a modulo n where n is of the form 2**p - k */ +/* reduces a modulo n where n is of the form 2**p - d */ int -mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k) +mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) { mp_int q; int p, res; @@ -5744,9 +5999,9 @@ mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k) goto ERR; } - if (k != 1) { - /* q = q * k */ - if ((res = mp_mul_d(&q, k, &q)) != MP_OKAY) { + if (d != 1) { + /* q = q * d */ + if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { goto ERR; } } @@ -6062,7 +6317,7 @@ int mp_shrink (mp_int * a) { mp_digit *tmp; if (a->alloc != a->used && a->used > 0) { - if ((tmp = OPT_CAST XREALLOC (a->dp, sizeof (mp_digit) * a->used)) == NULL) { + if ((tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * a->used)) == NULL) { return MP_MEM; } a->dp = tmp; @@ -6182,6 +6437,85 @@ mp_sqrmod (mp_int * a, mp_int * b, mp_int * c) /* End: bn_mp_sqrmod.c */ +/* Start: bn_mp_sqrt.c */ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* this function is less generic than mp_n_root, simpler and faster */ +int mp_sqrt(mp_int *arg, mp_int *ret) +{ + int res; + mp_int t1,t2; + + /* must be positive */ + if (arg->sign == MP_NEG) { + return MP_VAL; + } + + /* easy out */ + if (mp_iszero(arg) == MP_YES) { + mp_zero(ret); + return MP_OKAY; + } + + if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) { + return res; + } + + if ((res = mp_init(&t2)) != MP_OKAY) { + goto E2; + } + + /* First approx. (not very bad for large arg) */ + mp_rshd (&t1,t1.used/2); + + /* t1 > 0 */ + if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { + goto E1; + } + if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { + goto E1; + } + if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { + goto E1; + } + /* And now t1 > sqrt(arg) */ + do { + if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) { + goto E1; + } + if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) { + goto E1; + } + if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) { + goto E1; + } + /* t1 >= sqrt(arg) >= t2 at this point */ + } while (mp_cmp_mag(&t1,&t2) == MP_GT); + + mp_exch(&t1,ret); + +E1: mp_clear(&t2); +E2: mp_clear(&t1); + return res; +} + + +/* End: bn_mp_sqrt.c */ + /* Start: bn_mp_sub.c */ /* LibTomMath, multiple-precision integer library -- Tom St Denis * @@ -6463,8 +6797,7 @@ mp_to_unsigned_bin (mp_int * a, unsigned char *b) #include /* multiplication using the Toom-Cook 3-way algorithm */ -int -mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) +int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) { mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; int res, B; @@ -7019,6 +7352,93 @@ int mp_toradix (mp_int * a, char *str, int radix) /* End: bn_mp_toradix.c */ +/* Start: bn_mp_toradix_n.c */ +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ +#include + +/* stores a bignum as a ASCII string in a given radix (2..64) + * + * Stores upto maxlen-1 chars and always a NULL byte + */ +int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) +{ + int res, digs; + mp_int t; + mp_digit d; + char *_s = str; + + /* check range of the maxlen, radix */ + if (maxlen < 3 || radix < 2 || radix > 64) { + return MP_VAL; + } + + /* quick out if its zero */ + if (mp_iszero(a) == 1) { + *str++ = '0'; + *str = '\0'; + return MP_OKAY; + } + + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* if it is negative output a - */ + if (t.sign == MP_NEG) { + /* we have to reverse our digits later... but not the - sign!! */ + ++_s; + + /* store the flag and mark the number as positive */ + *str++ = '-'; + t.sign = MP_ZPOS; + + /* subtract a char */ + --maxlen; + } + + digs = 0; + while (mp_iszero (&t) == 0) { + if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { + mp_clear (&t); + return res; + } + *str++ = mp_s_rmap[d]; + ++digs; + + if (--maxlen == 1) { + /* no more room */ + break; + } + } + + /* reverse the digits of the string. In this case _s points + * to the first digit [exluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + + mp_clear (&t); + return MP_OKAY; +} + + +/* End: bn_mp_toradix_n.c */ + /* Start: bn_mp_unsigned_bin_size.c */ /* LibTomMath, multiple-precision integer library -- Tom St Denis * @@ -7814,8 +8234,8 @@ s_mp_sqr (mp_int * a, mp_int * b) pa = a->used; if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { return res; - } - + } + /* default used is maximum possible size */ t.used = 2*pa + 1; diff --git a/tommath.h b/tommath.h index 6cc9bb07c..00299947f 100644 --- a/tommath.h +++ b/tommath.h @@ -30,12 +30,12 @@ extern "C" { /* C++ compilers don't like assigning void * to mp_digit * */ -#define OPT_CAST (mp_digit *) +#define OPT_CAST(x) (x *) #else /* C on the other hand doesn't care */ -#define OPT_CAST +#define OPT_CAST(x) #endif @@ -99,13 +99,13 @@ extern "C" { #define XFREE free #define XREALLOC realloc #define XCALLOC calloc + #else + /* prototypes for our heap functions */ + extern void *XMALLOC(size_t n); + extern void *REALLOC(void *p, size_t n); + extern void *XCALLOC(size_t n, size_t s); + extern void XFREE(void *p); #endif - - /* prototypes for our heap functions */ - extern void *XMALLOC(size_t n); - extern void *REALLOC(void *p, size_t n); - extern void *XCALLOC(size_t n, size_t s); - extern void XFREE(void *p); #endif @@ -134,6 +134,12 @@ extern "C" { #define MP_YES 1 /* yes response */ #define MP_NO 0 /* no response */ +/* Primality generation flags */ +#define LTM_PRIME_BBS 0x0001 /* BBS style prime */ +#define LTM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */ +#define LTM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */ +#define LTM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */ + typedef int mp_err; /* you'll have to tune these... */ @@ -142,12 +148,18 @@ extern int KARATSUBA_MUL_CUTOFF, TOOM_MUL_CUTOFF, TOOM_SQR_CUTOFF; -/* various build options */ -#define MP_PREC 64 /* default digits of precision */ - /* define this to use lower memory usage routines (exptmods mostly) */ /* #define MP_LOW_MEM */ +/* default precision */ +#ifndef MP_PREC + #ifdef MP_LOW_MEM + #define MP_PREC 64 /* default digits of precision */ + #else + #define MP_PREC 8 /* default digits of precision */ + #endif +#endif + /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */ #define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1)) @@ -207,6 +219,15 @@ void mp_set(mp_int *a, mp_digit b); /* set a 32-bit const */ int mp_set_int(mp_int *a, unsigned long b); +/* get a 32-bit value */ +unsigned long mp_get_int(mp_int * a); + +/* initialize and set a digit */ +int mp_init_set (mp_int * a, mp_digit b); + +/* initialize and set 32-bit value */ +int mp_init_set_int (mp_int * a, unsigned long b); + /* copy, b = a */ int mp_copy(mp_int *a, mp_int *b); @@ -350,8 +371,11 @@ int mp_lcm(mp_int *a, mp_int *b, mp_int *c); */ int mp_n_root(mp_int *a, mp_digit b, mp_int *c); -/* shortcut for square root */ -#define mp_sqrt(a, b) mp_n_root(a, 2, b) +/* special sqrt algo */ +int mp_sqrt(mp_int *arg, mp_int *ret); + +/* is number a square? */ +int mp_is_square(mp_int *arg, int *ret); /* computes the jacobi c = (a | n) (or Legendre if b is prime) */ int mp_jacobi(mp_int *a, mp_int *n, int *c); @@ -393,7 +417,7 @@ int mp_reduce_is_2k(mp_int *a); int mp_reduce_2k_setup(mp_int *a, mp_digit *d); /* reduces a modulo b where b is of the form 2**p - k [0 <= a] */ -int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k); +int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d); /* d = a**b (mod c) */ int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d); @@ -453,8 +477,23 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style); * * The prime generated will be larger than 2^(8*size). */ -int mp_prime_random(mp_int *a, int t, int size, int bbs, ltm_prime_callback cb, void *dat); +#define mp_prime_random(a, t, size, bbs, cb, dat) mp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?LTM_PRIME_BBS:0, cb, dat) +/* makes a truly random prime of a given size (bits), + * + * Flags are as follows: + * + * LTM_PRIME_BBS - make prime congruent to 3 mod 4 + * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS) + * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero + * LTM_PRIME_2MSB_ON - make the 2nd highest bit one + * + * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can + * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself + * so it can be NULL + * + */ +int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat); /* ---> radix conversion <--- */ int mp_count_bits(mp_int *a); @@ -469,6 +508,7 @@ int mp_to_signed_bin(mp_int *a, unsigned char *b); int mp_read_radix(mp_int *a, char *str, int radix); int mp_toradix(mp_int *a, char *str, int radix); +int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen); int mp_radix_size(mp_int *a, int radix, int *size); int mp_fread(mp_int *a, int radix, FILE *stream); diff --git a/tommath.pdf b/tommath.pdf index 08d658bc7..fc3301a30 100644 Binary files a/tommath.pdf and b/tommath.pdf differ diff --git a/tommath.src b/tommath.src index bfe7f5fb5..0389831c8 100644 --- a/tommath.src +++ b/tommath.src @@ -49,7 +49,7 @@ \begin{document} \frontmatter \pagestyle{empty} -\title{Implementing Multiple Precision Arithmetic \\ ~ \\ Holiday Draft Edition } +\title{Implementing Multiple Precision Arithmetic \\ ~ \\ Draft Edition } \author{\mbox{ %\begin{small} \begin{tabular}{c} @@ -66,7 +66,7 @@ QUALCOMM Australia \\ } } \maketitle -This text has been placed in the public domain. This text corresponds to the v0.28 release of the +This text has been placed in the public domain. This text corresponds to the v0.30 release of the LibTomMath project. \begin{alltt} @@ -85,7 +85,7 @@ This text is formatted to the international B5 paper size of 176mm wide by 250mm \tableofcontents \listoffigures -\chapter*{Prefaces to the Holiday Draft Edition} +\chapter*{Prefaces to the Draft Edition} I started this text in April 2003 to complement my LibTomMath library. That is, explain how to implement the functions contained in LibTomMath. The goal is to have a textbook that any Computer Science student can use when implementing their own multiple precision arithmetic. The plan I wanted to follow was flesh out all the @@ -100,7 +100,7 @@ to read it. I had Jean-Luc Cooke print copies for me and I brought them to Cryp managed to grab a certain level of attention having people from around the world ask me for copies of the text was certain rewarding. -Now we are in December 2003. By this time I had pictured that I would have at least finished my second draft of the text. +Now we are past December 2003. By this time I had pictured that I would have at least finished my second draft of the text. Currently I am far off from this goal. I've done partial re-writes of chapters one, two and three but they are not even finished yet. I haven't given up on the project, only had some setbacks. First O'Reilly declined to publish the text then Addison-Wesley and Greg is tried another which I don't know the name of. However, at this point I want to focus my energy @@ -146,9 +146,6 @@ plan is to edit one chapter every two weeks starting January 4th. It seems insa should provide ample time. By Crypto'04 I plan to have a 2nd draft of the text polished and ready to hand out to as many people who will take it. -Finally, again, I'd like to thank my parents Vern and Katie St Denis for giving me a place to stay, food, clothes and -word of encouragement whenever I seemed to need it. Thanks! - \begin{flushright} Tom St Denis \end{flushright} \newpage @@ -485,7 +482,7 @@ exponentiation and Montgomery reduction have been provided to make the library m Even with the nearly optimal and specialized algorithms that have been included the Application Programing Interface (\textit{API}) has been kept as simple as possible. Often generic place holder routines will make use of specialized algorithms automatically without the developer's specific attention. One such example is the generic multiplication -algorithm \textbf{mp\_mul()} which will automatically use Karatsuba, Toom-Cook, Comba or baseline multiplication +algorithm \textbf{mp\_mul()} which will automatically use Toom--Cook, Karatsuba, Comba or baseline multiplication based on the magnitude of the inputs and the configuration of the library. Making LibTomMath as efficient as possible is not the only goal of the LibTomMath project. Ideally the library should @@ -510,12 +507,12 @@ segments of code littered throughout the source. This clean and uncluttered app developer can more readily discern the true intent of a given section of source code without trying to keep track of what conditional code will be used. -The code base of LibTomMath is also well organized. Each function is in its own separate source code file -which allows the reader to find a given function very quickly. On average there are about $76$ lines of code per source +The code base of LibTomMath is well organized. Each function is in its own separate source code file +which allows the reader to find a given function very quickly. On average there are $76$ lines of code per source file which makes the source very easily to follow. By comparison MPI and LIP are single file projects making code tracing very hard. GMP has many conditional code segments which also hinder tracing. -When compiled with GCC for the x86 processor and optimized for speed the entire library is approximately $66$KiB\footnote{The notation ``KiB'' means $2^{10}$ octets, similarly ``MiB'' means $2^{20}$ octets.} +When compiled with GCC for the x86 processor and optimized for speed the entire library is approximately $100$KiB\footnote{The notation ``KiB'' means $2^{10}$ octets, similarly ``MiB'' means $2^{20}$ octets.} which is fairly small compared to GMP (over $250$KiB). LibTomMath is slightly larger than MPI (which compiles to about $50$KiB) but LibTomMath is also much faster and more complete than MPI. @@ -2736,7 +2733,7 @@ general purpose multiplication. Given two polynomial basis representations $f(x light algebra \cite{KARAP} that the following polynomial is equivalent to multiplication of the two integers the polynomials represent. \begin{equation} -f(x) \cdot g(x) = acx^2 + ((a - b)(c - d) + ac + bd)x + bd +f(x) \cdot g(x) = acx^2 + ((a - b)(c - d) - (ac + bd))x + bd \end{equation} Using the observation that $ac$ and $bd$ could be re-used only three half sized multiplications would be required to produce the product. Applying @@ -3196,7 +3193,7 @@ Upon closer inspection this equation only requires the calculation of three half Karatsuba multiplication, this algorithm can be applied recursively on the input and will achieve an asymptotic running time of $O \left ( n^{lg(3)} \right )$. -You might ask yourself, if the asymptotic time of Karatsuba squaring and multiplication is the same, why not simply use the multiplication algorithm +If the asymptotic times of Karatsuba squaring and multiplication are the same, why not simply use the multiplication algorithm instead? The answer to this arises from the cutoff point for squaring. As in multiplication there exists a cutoff point, at which the time required for a Comba based squaring and a Karatsuba based squaring meet. Due to the overhead inherent in the Karatsuba method, the cutoff point is fairly high. For example, on an AMD Athlon XP processor with $\beta = 2^{28}$, the cutoff point is around 127 digits. diff --git a/tommath.tex b/tommath.tex index 488c87120..629edbad3 100644 --- a/tommath.tex +++ b/tommath.tex @@ -49,7 +49,7 @@ \begin{document} \frontmatter \pagestyle{empty} -\title{Implementing Multiple Precision Arithmetic \\ ~ \\ Holiday Draft Edition } +\title{Implementing Multiple Precision Arithmetic \\ ~ \\ Draft Edition } \author{\mbox{ %\begin{small} \begin{tabular}{c} @@ -66,7 +66,7 @@ } } \maketitle -This text has been placed in the public domain. This text corresponds to the v0.28 release of the +This text has been placed in the public domain. This text corresponds to the v0.30 release of the LibTomMath project. \begin{alltt} @@ -85,7 +85,7 @@ \tableofcontents \listoffigures -\chapter*{Prefaces to the Holiday Draft Edition} +\chapter*{Prefaces to the Draft Edition} I started this text in April 2003 to complement my LibTomMath library. That is, explain how to implement the functions contained in LibTomMath. The goal is to have a textbook that any Computer Science student can use when implementing their own multiple precision arithmetic. The plan I wanted to follow was flesh out all the @@ -100,7 +100,7 @@ \chapter*{Prefaces to the Holiday Draft Edition} managed to grab a certain level of attention having people from around the world ask me for copies of the text was certain rewarding. -Now we are in December 2003. By this time I had pictured that I would have at least finished my second draft of the text. +Now we are past December 2003. By this time I had pictured that I would have at least finished my second draft of the text. Currently I am far off from this goal. I've done partial re-writes of chapters one, two and three but they are not even finished yet. I haven't given up on the project, only had some setbacks. First O'Reilly declined to publish the text then Addison-Wesley and Greg is tried another which I don't know the name of. However, at this point I want to focus my energy @@ -146,9 +146,6 @@ \chapter*{Prefaces to the Holiday Draft Edition} should provide ample time. By Crypto'04 I plan to have a 2nd draft of the text polished and ready to hand out to as many people who will take it. -Finally, again, I'd like to thank my parents Vern and Katie St Denis for giving me a place to stay, food, clothes and -word of encouragement whenever I seemed to need it. Thanks! - \begin{flushright} Tom St Denis \end{flushright} \newpage @@ -485,7 +482,7 @@ \subsection{Goals of LibTomMath} Even with the nearly optimal and specialized algorithms that have been included the Application Programing Interface (\textit{API}) has been kept as simple as possible. Often generic place holder routines will make use of specialized algorithms automatically without the developer's specific attention. One such example is the generic multiplication -algorithm \textbf{mp\_mul()} which will automatically use Karatsuba, Toom-Cook, Comba or baseline multiplication +algorithm \textbf{mp\_mul()} which will automatically use Toom--Cook, Karatsuba, Comba or baseline multiplication based on the magnitude of the inputs and the configuration of the library. Making LibTomMath as efficient as possible is not the only goal of the LibTomMath project. Ideally the library should @@ -510,12 +507,12 @@ \subsection{Code Base} developer can more readily discern the true intent of a given section of source code without trying to keep track of what conditional code will be used. -The code base of LibTomMath is also well organized. Each function is in its own separate source code file -which allows the reader to find a given function very quickly. On average there are about $76$ lines of code per source +The code base of LibTomMath is well organized. Each function is in its own separate source code file +which allows the reader to find a given function very quickly. On average there are $76$ lines of code per source file which makes the source very easily to follow. By comparison MPI and LIP are single file projects making code tracing very hard. GMP has many conditional code segments which also hinder tracing. -When compiled with GCC for the x86 processor and optimized for speed the entire library is approximately $66$KiB\footnote{The notation ``KiB'' means $2^{10}$ octets, similarly ``MiB'' means $2^{20}$ octets.} +When compiled with GCC for the x86 processor and optimized for speed the entire library is approximately $100$KiB\footnote{The notation ``KiB'' means $2^{10}$ octets, similarly ``MiB'' means $2^{20}$ octets.} which is fairly small compared to GMP (over $250$KiB). LibTomMath is slightly larger than MPI (which compiles to about $50$KiB) but LibTomMath is also much faster and more complete than MPI. @@ -584,7 +581,7 @@ \section{Library Basics} \begin{center} \begin{figure}[here] -\includegraphics{pics/design_process} +\includegraphics{pics/design_process.ps} \caption{Design Flow of the First Few Original LibTomMath Functions.} \label{pic:design_process} \end{figure} @@ -808,7 +805,7 @@ \subsection{Initializing an mp\_int} 018 int mp_init (mp_int * a) 019 \{ 020 /* allocate memory required and clear it */ -021 a->dp = OPT_CAST XCALLOC (sizeof (mp_digit), MP_PREC); +021 a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC); 022 if (a->dp == NULL) \{ 023 return MP_MEM; 024 \} @@ -990,7 +987,7 @@ \subsection{Augmenting an mp\_int's Precision} 031 * in case the operation failed we don't want 032 * to overwrite the dp member of a. 033 */ -034 tmp = OPT_CAST XREALLOC (a->dp, sizeof (mp_digit) * size); +034 tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size); 035 if (tmp == NULL) \{ 036 /* reallocation failed but "a" is still valid [can be freed] */ 037 return MP_MEM; @@ -1072,7 +1069,7 @@ \subsection{Initializing Variable Precision mp\_ints} 021 size += (MP_PREC * 2) - (size % MP_PREC); 022 023 /* alloc mem */ -024 a->dp = OPT_CAST XCALLOC (sizeof (mp_digit), size); +024 a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), size); 025 if (a->dp == NULL) \{ 026 return MP_MEM; 027 \} @@ -1651,7 +1648,7 @@ \subsection{Integer Negation} 021 if ((res = mp_copy (a, b)) != MP_OKAY) \{ 022 return res; 023 \} -024 if (mp_iszero(b) != 1) \{ +024 if (mp_iszero(b) != MP_YES) \{ 025 b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; 026 \} 027 return MP_OKAY; @@ -2860,7 +2857,7 @@ \subsection{Multiplication by $x$} \newpage \begin{center} \begin{figure}[here] -\includegraphics{pics/sliding_window} +\includegraphics{pics/sliding_window.ps} \caption{Sliding Window Movement} \label{pic:sliding_window} \end{figure} @@ -4024,7 +4021,7 @@ \subsection{Karatsuba Multiplication} light algebra \cite{KARAP} that the following polynomial is equivalent to multiplication of the two integers the polynomials represent. \begin{equation} -f(x) \cdot g(x) = acx^2 + ((a - b)(c - d) + ac + bd)x + bd +f(x) \cdot g(x) = acx^2 + ((a - b)(c - d) - (ac + bd))x + bd \end{equation} Using the observation that $ac$ and $bd$ could be re-used only three half sized multiplications would be required to produce the product. Applying @@ -4138,132 +4135,131 @@ \subsection{Karatsuba Multiplication} 043 * Generally though the overhead of this method doesn't pay off 044 * until a certain size (N ~ 80) is reached. 045 */ -046 int -047 mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) -048 \{ -049 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; -050 int B, err; -051 -052 /* default the return code to an error */ -053 err = MP_MEM; -054 -055 /* min # of digits */ -056 B = MIN (a->used, b->used); -057 -058 /* now divide in two */ -059 B = B / 2; -060 -061 /* init copy all the temps */ -062 if (mp_init_size (&x0, B) != MP_OKAY) -063 goto ERR; -064 if (mp_init_size (&x1, a->used - B) != MP_OKAY) -065 goto X0; -066 if (mp_init_size (&y0, B) != MP_OKAY) -067 goto X1; -068 if (mp_init_size (&y1, b->used - B) != MP_OKAY) -069 goto Y0; -070 -071 /* init temps */ -072 if (mp_init_size (&t1, B * 2) != MP_OKAY) -073 goto Y1; -074 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) -075 goto T1; -076 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) -077 goto X0Y0; -078 -079 /* now shift the digits */ -080 x0.sign = x1.sign = a->sign; -081 y0.sign = y1.sign = b->sign; -082 -083 x0.used = y0.used = B; -084 x1.used = a->used - B; -085 y1.used = b->used - B; -086 -087 \{ -088 register int x; -089 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; -090 -091 /* we copy the digits directly instead of using higher level functions -092 * since we also need to shift the digits -093 */ -094 tmpa = a->dp; -095 tmpb = b->dp; -096 -097 tmpx = x0.dp; -098 tmpy = y0.dp; -099 for (x = 0; x < B; x++) \{ -100 *tmpx++ = *tmpa++; -101 *tmpy++ = *tmpb++; -102 \} -103 -104 tmpx = x1.dp; -105 for (x = B; x < a->used; x++) \{ -106 *tmpx++ = *tmpa++; -107 \} -108 -109 tmpy = y1.dp; -110 for (x = B; x < b->used; x++) \{ -111 *tmpy++ = *tmpb++; -112 \} -113 \} -114 -115 /* only need to clamp the lower words since by definition the -116 * upper words x1/y1 must have a known number of digits -117 */ -118 mp_clamp (&x0); -119 mp_clamp (&y0); -120 -121 /* now calc the products x0y0 and x1y1 */ -122 /* after this x0 is no longer required, free temp [x0==t2]! */ -123 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) -124 goto X1Y1; /* x0y0 = x0*y0 */ -125 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) -126 goto X1Y1; /* x1y1 = x1*y1 */ -127 -128 /* now calc x1-x0 and y1-y0 */ -129 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) -130 goto X1Y1; /* t1 = x1 - x0 */ -131 if (mp_sub (&y1, &y0, &x0) != MP_OKAY) -132 goto X1Y1; /* t2 = y1 - y0 */ -133 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) -134 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ -135 -136 /* add x0y0 */ -137 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) -138 goto X1Y1; /* t2 = x0y0 + x1y1 */ -139 if (mp_sub (&x0, &t1, &t1) != MP_OKAY) -140 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ -141 -142 /* shift by B */ -143 if (mp_lshd (&t1, B) != MP_OKAY) -144 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<used, b->used); +056 +057 /* now divide in two */ +058 B = B >> 1; +059 +060 /* init copy all the temps */ +061 if (mp_init_size (&x0, B) != MP_OKAY) +062 goto ERR; +063 if (mp_init_size (&x1, a->used - B) != MP_OKAY) +064 goto X0; +065 if (mp_init_size (&y0, B) != MP_OKAY) +066 goto X1; +067 if (mp_init_size (&y1, b->used - B) != MP_OKAY) +068 goto Y0; +069 +070 /* init temps */ +071 if (mp_init_size (&t1, B * 2) != MP_OKAY) +072 goto Y1; +073 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) +074 goto T1; +075 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) +076 goto X0Y0; +077 +078 /* now shift the digits */ +079 x0.sign = x1.sign = a->sign; +080 y0.sign = y1.sign = b->sign; +081 +082 x0.used = y0.used = B; +083 x1.used = a->used - B; +084 y1.used = b->used - B; +085 +086 \{ +087 register int x; +088 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; +089 +090 /* we copy the digits directly instead of using higher level functions +091 * since we also need to shift the digits +092 */ +093 tmpa = a->dp; +094 tmpb = b->dp; +095 +096 tmpx = x0.dp; +097 tmpy = y0.dp; +098 for (x = 0; x < B; x++) \{ +099 *tmpx++ = *tmpa++; +100 *tmpy++ = *tmpb++; +101 \} +102 +103 tmpx = x1.dp; +104 for (x = B; x < a->used; x++) \{ +105 *tmpx++ = *tmpa++; +106 \} +107 +108 tmpy = y1.dp; +109 for (x = B; x < b->used; x++) \{ +110 *tmpy++ = *tmpb++; +111 \} +112 \} +113 +114 /* only need to clamp the lower words since by definition the +115 * upper words x1/y1 must have a known number of digits +116 */ +117 mp_clamp (&x0); +118 mp_clamp (&y0); +119 +120 /* now calc the products x0y0 and x1y1 */ +121 /* after this x0 is no longer required, free temp [x0==t2]! */ +122 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) +123 goto X1Y1; /* x0y0 = x0*y0 */ +124 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) +125 goto X1Y1; /* x1y1 = x1*y1 */ +126 +127 /* now calc x1-x0 and y1-y0 */ +128 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) +129 goto X1Y1; /* t1 = x1 - x0 */ +130 if (mp_sub (&y1, &y0, &x0) != MP_OKAY) +131 goto X1Y1; /* t2 = y1 - y0 */ +132 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) +133 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ +134 +135 /* add x0y0 */ +136 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) +137 goto X1Y1; /* t2 = x0y0 + x1y1 */ +138 if (mp_sub (&x0, &t1, &t1) != MP_OKAY) +139 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ +140 +141 /* shift by B */ +142 if (mp_lshd (&t1, B) != MP_OKAY) +143 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<used, b->used) / 3; -033 -034 /* a = a2 * B**2 + a1 * B + a0 */ -035 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) \{ -036 goto ERR; -037 \} -038 -039 if ((res = mp_copy(a, &a1)) != MP_OKAY) \{ -040 goto ERR; -041 \} -042 mp_rshd(&a1, B); -043 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); -044 -045 if ((res = mp_copy(a, &a2)) != MP_OKAY) \{ -046 goto ERR; -047 \} -048 mp_rshd(&a2, B*2); -049 -050 /* b = b2 * B**2 + b1 * B + b0 */ -051 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) \{ -052 goto ERR; -053 \} -054 -055 if ((res = mp_copy(b, &b1)) != MP_OKAY) \{ -056 goto ERR; -057 \} -058 mp_rshd(&b1, B); -059 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); -060 -061 if ((res = mp_copy(b, &b2)) != MP_OKAY) \{ -062 goto ERR; -063 \} -064 mp_rshd(&b2, B*2); -065 -066 /* w0 = a0*b0 */ -067 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) \{ -068 goto ERR; -069 \} -070 -071 /* w4 = a2 * b2 */ -072 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) \{ -073 goto ERR; -074 \} -075 -076 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ -077 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) \{ -078 goto ERR; -079 \} -080 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) \{ -081 goto ERR; -082 \} -083 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) \{ -084 goto ERR; -085 \} -086 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) \{ -087 goto ERR; -088 \} -089 -090 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) \{ -091 goto ERR; -092 \} -093 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) \{ -094 goto ERR; -095 \} -096 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) \{ -097 goto ERR; -098 \} -099 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) \{ -100 goto ERR; -101 \} -102 -103 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) \{ -104 goto ERR; -105 \} -106 -107 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ -108 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) \{ -109 goto ERR; -110 \} -111 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) \{ -112 goto ERR; -113 \} -114 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) \{ -115 goto ERR; -116 \} -117 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) \{ -118 goto ERR; -119 \} -120 -121 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) \{ -122 goto ERR; -123 \} -124 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) \{ -125 goto ERR; -126 \} -127 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) \{ -128 goto ERR; -129 \} -130 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) \{ -131 goto ERR; -132 \} -133 -134 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) \{ -135 goto ERR; -136 \} -137 -138 -139 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ -140 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) \{ -141 goto ERR; -142 \} -143 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) \{ -144 goto ERR; -145 \} -146 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) \{ -147 goto ERR; -148 \} -149 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) \{ -150 goto ERR; -151 \} -152 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) \{ -153 goto ERR; -154 \} -155 -156 /* now solve the matrix -157 -158 0 0 0 0 1 -159 1 2 4 8 16 -160 1 1 1 1 1 -161 16 8 4 2 1 -162 1 0 0 0 0 -163 -164 using 12 subtractions, 4 shifts, -165 2 small divisions and 1 small multiplication -166 */ -167 -168 /* r1 - r4 */ -169 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) \{ -170 goto ERR; -171 \} -172 /* r3 - r0 */ -173 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) \{ -174 goto ERR; -175 \} -176 /* r1/2 */ -177 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) \{ -178 goto ERR; -179 \} -180 /* r3/2 */ -181 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) \{ -182 goto ERR; -183 \} -184 /* r2 - r0 - r4 */ -185 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) \{ -186 goto ERR; -187 \} -188 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) \{ -189 goto ERR; -190 \} -191 /* r1 - r2 */ -192 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) \{ -193 goto ERR; -194 \} -195 /* r3 - r2 */ -196 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) \{ -197 goto ERR; -198 \} -199 /* r1 - 8r0 */ -200 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) \{ -201 goto ERR; -202 \} -203 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) \{ -204 goto ERR; -205 \} -206 /* r3 - 8r4 */ -207 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) \{ -208 goto ERR; -209 \} -210 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) \{ -211 goto ERR; -212 \} -213 /* 3r2 - r1 - r3 */ -214 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) \{ -215 goto ERR; -216 \} -217 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) \{ -218 goto ERR; -219 \} -220 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) \{ -221 goto ERR; -222 \} -223 /* r1 - r2 */ -224 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) \{ -225 goto ERR; -226 \} -227 /* r3 - r2 */ -228 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) \{ -229 goto ERR; -230 \} -231 /* r1/3 */ -232 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) \{ -233 goto ERR; -234 \} -235 /* r3/3 */ -236 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) \{ -237 goto ERR; -238 \} -239 -240 /* at this point shift W[n] by B*n */ -241 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) \{ -242 goto ERR; -243 \} -244 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) \{ -245 goto ERR; -246 \} -247 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) \{ -248 goto ERR; -249 \} -250 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) \{ -251 goto ERR; -252 \} -253 -254 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) \{ -255 goto ERR; -256 \} -257 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) \{ -258 goto ERR; -259 \} -260 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) \{ -261 goto ERR; -262 \} -263 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) \{ -264 goto ERR; -265 \} -266 -267 ERR: -268 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, -269 &a0, &a1, &a2, &b0, &b1, -270 &b2, &tmp1, &tmp2, NULL); -271 return res; -272 \} -273 +018 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) +019 \{ +020 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; +021 int res, B; +022 +023 /* init temps */ +024 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, +025 &a0, &a1, &a2, &b0, &b1, +026 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) \{ +027 return res; +028 \} +029 +030 /* B */ +031 B = MIN(a->used, b->used) / 3; +032 +033 /* a = a2 * B**2 + a1 * B + a0 */ +034 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) \{ +035 goto ERR; +036 \} +037 +038 if ((res = mp_copy(a, &a1)) != MP_OKAY) \{ +039 goto ERR; +040 \} +041 mp_rshd(&a1, B); +042 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); +043 +044 if ((res = mp_copy(a, &a2)) != MP_OKAY) \{ +045 goto ERR; +046 \} +047 mp_rshd(&a2, B*2); +048 +049 /* b = b2 * B**2 + b1 * B + b0 */ +050 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) \{ +051 goto ERR; +052 \} +053 +054 if ((res = mp_copy(b, &b1)) != MP_OKAY) \{ +055 goto ERR; +056 \} +057 mp_rshd(&b1, B); +058 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); +059 +060 if ((res = mp_copy(b, &b2)) != MP_OKAY) \{ +061 goto ERR; +062 \} +063 mp_rshd(&b2, B*2); +064 +065 /* w0 = a0*b0 */ +066 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) \{ +067 goto ERR; +068 \} +069 +070 /* w4 = a2 * b2 */ +071 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) \{ +072 goto ERR; +073 \} +074 +075 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ +076 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) \{ +077 goto ERR; +078 \} +079 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) \{ +080 goto ERR; +081 \} +082 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) \{ +083 goto ERR; +084 \} +085 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) \{ +086 goto ERR; +087 \} +088 +089 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) \{ +090 goto ERR; +091 \} +092 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) \{ +093 goto ERR; +094 \} +095 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) \{ +096 goto ERR; +097 \} +098 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) \{ +099 goto ERR; +100 \} +101 +102 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) \{ +103 goto ERR; +104 \} +105 +106 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ +107 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) \{ +108 goto ERR; +109 \} +110 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) \{ +111 goto ERR; +112 \} +113 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) \{ +114 goto ERR; +115 \} +116 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) \{ +117 goto ERR; +118 \} +119 +120 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) \{ +121 goto ERR; +122 \} +123 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) \{ +124 goto ERR; +125 \} +126 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) \{ +127 goto ERR; +128 \} +129 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) \{ +130 goto ERR; +131 \} +132 +133 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) \{ +134 goto ERR; +135 \} +136 +137 +138 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ +139 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) \{ +140 goto ERR; +141 \} +142 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) \{ +143 goto ERR; +144 \} +145 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) \{ +146 goto ERR; +147 \} +148 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) \{ +149 goto ERR; +150 \} +151 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) \{ +152 goto ERR; +153 \} +154 +155 /* now solve the matrix +156 +157 0 0 0 0 1 +158 1 2 4 8 16 +159 1 1 1 1 1 +160 16 8 4 2 1 +161 1 0 0 0 0 +162 +163 using 12 subtractions, 4 shifts, +164 2 small divisions and 1 small multiplication +165 */ +166 +167 /* r1 - r4 */ +168 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) \{ +169 goto ERR; +170 \} +171 /* r3 - r0 */ +172 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) \{ +173 goto ERR; +174 \} +175 /* r1/2 */ +176 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) \{ +177 goto ERR; +178 \} +179 /* r3/2 */ +180 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) \{ +181 goto ERR; +182 \} +183 /* r2 - r0 - r4 */ +184 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) \{ +185 goto ERR; +186 \} +187 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) \{ +188 goto ERR; +189 \} +190 /* r1 - r2 */ +191 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) \{ +192 goto ERR; +193 \} +194 /* r3 - r2 */ +195 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) \{ +196 goto ERR; +197 \} +198 /* r1 - 8r0 */ +199 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) \{ +200 goto ERR; +201 \} +202 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) \{ +203 goto ERR; +204 \} +205 /* r3 - 8r4 */ +206 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) \{ +207 goto ERR; +208 \} +209 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) \{ +210 goto ERR; +211 \} +212 /* 3r2 - r1 - r3 */ +213 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) \{ +214 goto ERR; +215 \} +216 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) \{ +217 goto ERR; +218 \} +219 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) \{ +220 goto ERR; +221 \} +222 /* r1 - r2 */ +223 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) \{ +224 goto ERR; +225 \} +226 /* r3 - r2 */ +227 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) \{ +228 goto ERR; +229 \} +230 /* r1/3 */ +231 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) \{ +232 goto ERR; +233 \} +234 /* r3/3 */ +235 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) \{ +236 goto ERR; +237 \} +238 +239 /* at this point shift W[n] by B*n */ +240 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) \{ +241 goto ERR; +242 \} +243 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) \{ +244 goto ERR; +245 \} +246 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) \{ +247 goto ERR; +248 \} +249 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) \{ +250 goto ERR; +251 \} +252 +253 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) \{ +254 goto ERR; +255 \} +256 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) \{ +257 goto ERR; +258 \} +259 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) \{ +260 goto ERR; +261 \} +262 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) \{ +263 goto ERR; +264 \} +265 +266 ERR: +267 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, +268 &a0, &a1, &a2, &b0, &b1, +269 &b2, &tmp1, &tmp2, NULL); +270 return res; +271 \} +272 \end{alltt} \end{small} @@ -4855,8 +4850,8 @@ \subsection{The Baseline Squaring Algorithm} 026 pa = a->used; 027 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) \{ 028 return res; -029 \} -030 +029 \} +030 031 /* default used is maximum possible size */ 032 t.used = 2*pa + 1; 033 @@ -5008,113 +5003,112 @@ \subsection{Faster Squaring by the ``Comba'' Method} 031 * Based on Algorithm 14.16 on pp.597 of HAC. 032 * 033 */ -034 int -035 fast_s_mp_sqr (mp_int * a, mp_int * b) -036 \{ -037 int olduse, newused, res, ix, pa; -038 mp_word W2[MP_WARRAY], W[MP_WARRAY]; -039 -040 /* calculate size of product and allocate as required */ -041 pa = a->used; -042 newused = pa + pa + 1; -043 if (b->alloc < newused) \{ -044 if ((res = mp_grow (b, newused)) != MP_OKAY) \{ -045 return res; -046 \} -047 \} -048 -049 /* zero temp buffer (columns) -050 * Note that there are two buffers. Since squaring requires -051 * a outer and inner product and the inner product requires -052 * computing a product and doubling it (a relatively expensive -053 * op to perform n**2 times if you don't have to) the inner and -054 * outer products are computed in different buffers. This way -055 * the inner product can be doubled using n doublings instead of -056 * n**2 -057 */ -058 memset (W, 0, newused * sizeof (mp_word)); -059 memset (W2, 0, newused * sizeof (mp_word)); -060 -061 /* This computes the inner product. To simplify the inner N**2 loop -062 * the multiplication by two is done afterwards in the N loop. -063 */ -064 for (ix = 0; ix < pa; ix++) \{ -065 /* compute the outer product -066 * -067 * Note that every outer product is computed -068 * for a particular column only once which means that -069 * there is no need todo a double precision addition -070 * into the W2[] array. -071 */ -072 W2[ix + ix] = ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]); -073 -074 \{ -075 register mp_digit tmpx, *tmpy; -076 register mp_word *_W; -077 register int iy; -078 -079 /* copy of left side */ -080 tmpx = a->dp[ix]; -081 -082 /* alias for right side */ -083 tmpy = a->dp + (ix + 1); -084 -085 /* the column to store the result in */ -086 _W = W + (ix + ix + 1); -087 -088 /* inner products */ -089 for (iy = ix + 1; iy < pa; iy++) \{ -090 *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++); -091 \} -092 \} -093 \} -094 -095 /* setup dest */ -096 olduse = b->used; -097 b->used = newused; -098 -099 /* now compute digits -100 * -101 * We have to double the inner product sums, add in the -102 * outer product sums, propagate carries and convert -103 * to single precision. -104 */ -105 \{ -106 register mp_digit *tmpb; -107 -108 /* double first value, since the inner products are -109 * half of what they should be -110 */ -111 W[0] += W[0] + W2[0]; -112 -113 tmpb = b->dp; -114 for (ix = 1; ix < newused; ix++) \{ -115 /* double/add next digit */ -116 W[ix] += W[ix] + W2[ix]; -117 -118 /* propagate carry forwards [from the previous digit] */ -119 W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT)); -120 -121 /* store the current digit now that the carry isn't -122 * needed -123 */ -124 *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); -125 \} -126 /* set the last value. Note even if the carry is zero -127 * this is required since the next step will not zero -128 * it if b originally had a value at b->dp[2*a.used] -129 */ -130 *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK)); -131 -132 /* clear high digits of b if there were any originally */ -133 for (; ix < olduse; ix++) \{ -134 *tmpb++ = 0; -135 \} -136 \} -137 -138 mp_clamp (b); -139 return MP_OKAY; -140 \} +034 int fast_s_mp_sqr (mp_int * a, mp_int * b) +035 \{ +036 int olduse, newused, res, ix, pa; +037 mp_word W2[MP_WARRAY], W[MP_WARRAY]; +038 +039 /* calculate size of product and allocate as required */ +040 pa = a->used; +041 newused = pa + pa + 1; +042 if (b->alloc < newused) \{ +043 if ((res = mp_grow (b, newused)) != MP_OKAY) \{ +044 return res; +045 \} +046 \} +047 +048 /* zero temp buffer (columns) +049 * Note that there are two buffers. Since squaring requires +050 * a outer and inner product and the inner product requires +051 * computing a product and doubling it (a relatively expensive +052 * op to perform n**2 times if you don't have to) the inner and +053 * outer products are computed in different buffers. This way +054 * the inner product can be doubled using n doublings instead of +055 * n**2 +056 */ +057 memset (W, 0, newused * sizeof (mp_word)); +058 memset (W2, 0, newused * sizeof (mp_word)); +059 +060 /* This computes the inner product. To simplify the inner N**2 loop +061 * the multiplication by two is done afterwards in the N loop. +062 */ +063 for (ix = 0; ix < pa; ix++) \{ +064 /* compute the outer product +065 * +066 * Note that every outer product is computed +067 * for a particular column only once which means that +068 * there is no need todo a double precision addition +069 * into the W2[] array. +070 */ +071 W2[ix + ix] = ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]); +072 +073 \{ +074 register mp_digit tmpx, *tmpy; +075 register mp_word *_W; +076 register int iy; +077 +078 /* copy of left side */ +079 tmpx = a->dp[ix]; +080 +081 /* alias for right side */ +082 tmpy = a->dp + (ix + 1); +083 +084 /* the column to store the result in */ +085 _W = W + (ix + ix + 1); +086 +087 /* inner products */ +088 for (iy = ix + 1; iy < pa; iy++) \{ +089 *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++); +090 \} +091 \} +092 \} +093 +094 /* setup dest */ +095 olduse = b->used; +096 b->used = newused; +097 +098 /* now compute digits +099 * +100 * We have to double the inner product sums, add in the +101 * outer product sums, propagate carries and convert +102 * to single precision. +103 */ +104 \{ +105 register mp_digit *tmpb; +106 +107 /* double first value, since the inner products are +108 * half of what they should be +109 */ +110 W[0] += W[0] + W2[0]; +111 +112 tmpb = b->dp; +113 for (ix = 1; ix < newused; ix++) \{ +114 /* double/add next digit */ +115 W[ix] += W[ix] + W2[ix]; +116 +117 /* propagate carry forwards [from the previous digit] */ +118 W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT)); +119 +120 /* store the current digit now that the carry isn't +121 * needed +122 */ +123 *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); +124 \} +125 /* set the last value. Note even if the carry is zero +126 * this is required since the next step will not zero +127 * it if b originally had a value at b->dp[2*a.used] +128 */ +129 *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK)); +130 +131 /* clear high digits of b if there were any originally */ +132 for (; ix < olduse; ix++) \{ +133 *tmpb++ = 0; +134 \} +135 \} +136 +137 mp_clamp (b); +138 return MP_OKAY; +139 \} \end{alltt} \end{small} @@ -5138,7 +5132,7 @@ \subsection{Karatsuba Squaring} Karatsuba multiplication, this algorithm can be applied recursively on the input and will achieve an asymptotic running time of $O \left ( n^{lg(3)} \right )$. -You might ask yourself, if the asymptotic time of Karatsuba squaring and multiplication is the same, why not simply use the multiplication algorithm +If the asymptotic times of Karatsuba squaring and multiplication are the same, why not simply use the multiplication algorithm instead? The answer to this arises from the cutoff point for squaring. As in multiplication there exists a cutoff point, at which the time required for a Comba based squaring and a Karatsuba based squaring meet. Due to the overhead inherent in the Karatsuba method, the cutoff point is fairly high. For example, on an AMD Athlon XP processor with $\beta = 2^{28}$, the cutoff point is around 127 digits. @@ -5231,104 +5225,103 @@ \subsection{Karatsuba Squaring} 021 * is essentially the same algorithm but merely 022 * tuned to perform recursive squarings. 023 */ -024 int -025 mp_karatsuba_sqr (mp_int * a, mp_int * b) -026 \{ -027 mp_int x0, x1, t1, t2, x0x0, x1x1; -028 int B, err; -029 -030 err = MP_MEM; -031 -032 /* min # of digits */ -033 B = a->used; -034 -035 /* now divide in two */ -036 B = B / 2; -037 -038 /* init copy all the temps */ -039 if (mp_init_size (&x0, B) != MP_OKAY) -040 goto ERR; -041 if (mp_init_size (&x1, a->used - B) != MP_OKAY) -042 goto X0; -043 -044 /* init temps */ -045 if (mp_init_size (&t1, a->used * 2) != MP_OKAY) -046 goto X1; -047 if (mp_init_size (&t2, a->used * 2) != MP_OKAY) -048 goto T1; -049 if (mp_init_size (&x0x0, B * 2) != MP_OKAY) -050 goto T2; -051 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY) -052 goto X0X0; -053 -054 \{ -055 register int x; -056 register mp_digit *dst, *src; -057 -058 src = a->dp; -059 -060 /* now shift the digits */ -061 dst = x0.dp; -062 for (x = 0; x < B; x++) \{ -063 *dst++ = *src++; -064 \} -065 -066 dst = x1.dp; -067 for (x = B; x < a->used; x++) \{ -068 *dst++ = *src++; -069 \} -070 \} -071 -072 x0.used = B; -073 x1.used = a->used - B; -074 -075 mp_clamp (&x0); -076 -077 /* now calc the products x0*x0 and x1*x1 */ -078 if (mp_sqr (&x0, &x0x0) != MP_OKAY) -079 goto X1X1; /* x0x0 = x0*x0 */ -080 if (mp_sqr (&x1, &x1x1) != MP_OKAY) -081 goto X1X1; /* x1x1 = x1*x1 */ -082 -083 /* now calc (x1-x0)**2 */ -084 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) -085 goto X1X1; /* t1 = x1 - x0 */ -086 if (mp_sqr (&t1, &t1) != MP_OKAY) -087 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ -088 -089 /* add x0y0 */ -090 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) -091 goto X1X1; /* t2 = x0x0 + x1x1 */ -092 if (mp_sub (&t2, &t1, &t1) != MP_OKAY) -093 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */ -094 -095 /* shift by B */ -096 if (mp_lshd (&t1, B) != MP_OKAY) -097 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<used; +033 +034 /* now divide in two */ +035 B = B >> 1; +036 +037 /* init copy all the temps */ +038 if (mp_init_size (&x0, B) != MP_OKAY) +039 goto ERR; +040 if (mp_init_size (&x1, a->used - B) != MP_OKAY) +041 goto X0; +042 +043 /* init temps */ +044 if (mp_init_size (&t1, a->used * 2) != MP_OKAY) +045 goto X1; +046 if (mp_init_size (&t2, a->used * 2) != MP_OKAY) +047 goto T1; +048 if (mp_init_size (&x0x0, B * 2) != MP_OKAY) +049 goto T2; +050 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY) +051 goto X0X0; +052 +053 \{ +054 register int x; +055 register mp_digit *dst, *src; +056 +057 src = a->dp; +058 +059 /* now shift the digits */ +060 dst = x0.dp; +061 for (x = 0; x < B; x++) \{ +062 *dst++ = *src++; +063 \} +064 +065 dst = x1.dp; +066 for (x = B; x < a->used; x++) \{ +067 *dst++ = *src++; +068 \} +069 \} +070 +071 x0.used = B; +072 x1.used = a->used - B; +073 +074 mp_clamp (&x0); +075 +076 /* now calc the products x0*x0 and x1*x1 */ +077 if (mp_sqr (&x0, &x0x0) != MP_OKAY) +078 goto X1X1; /* x0x0 = x0*x0 */ +079 if (mp_sqr (&x1, &x1x1) != MP_OKAY) +080 goto X1X1; /* x1x1 = x1*x1 */ +081 +082 /* now calc (x1-x0)**2 */ +083 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) +084 goto X1X1; /* t1 = x1 - x0 */ +085 if (mp_sqr (&t1, &t1) != MP_OKAY) +086 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ +087 +088 /* add x0y0 */ +089 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) +090 goto X1X1; /* t2 = x0x0 + x1x1 */ +091 if (mp_sub (&t2, &t1, &t1) != MP_OKAY) +092 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */ +093 +094 /* shift by B */ +095 if (mp_lshd (&t1, B) != MP_OKAY) +096 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<