diff --git a/README.md b/README.md index 79fffd2..30cffd7 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Issues on this repository are strictly for problems or questions concerning the This code package requires a C++11 compiler. The code uses OpenMP directives, so compiler support for OpenMP is expected. GCC is preferred (and the only platform tested). There is one method that involves a GCC built-in function (`chi_square_tests.h -> binary_goodness_of_fit() -> __builtin_popcount()`). To run this you will need some compiler that supplies this GCC built-in function (GCC and clang both do so). -The resulting binary is linked with bzlib and divsufsort, so these libraries (and their associated include files) must be installed and accessible to the compiler. +The resulting binary is linked with bzlib, divsufsort, GMP MP and GNU MPFR, so these libraries (and their associated include files) must be installed and accessible to the compiler. See [the wiki](https://github.com/usnistgov/SP800-90B_EntropyAssessment/wiki/Installing-libdivsufsort) for some distribution-specific instructions on installing divsufsort. @@ -55,6 +55,7 @@ You may specify either `-i` or `-c`, and either `-a` or `-t`. These correspond t * Note: When testing binary data, no `H_bitstring` assessment is produced, so the `-a` and `-t` options produce the same results for the initial assessment of binary data. * `-l`: Reads (at most) `samples` data samples after indexing into the file by `index*samples` bytes. * `-v`: Optional verbosity flag for more output. Can be used multiple times. +======= * bits_per_symbol are the number of bits per symbol. Each symbol is expected to fit within a single byte. To run the non-IID tests, use the Makefile to compile: diff --git a/cpp/Makefile b/cpp/Makefile index d5ac13f..405cec2 100644 --- a/cpp/Makefile +++ b/cpp/Makefile @@ -30,7 +30,7 @@ restart_main.o: restart_main.cpp conditioning: conditioning_main.o conditioning_main.o: conditioning_main.cpp - $(CXX) $(CXXFLAGS) $(INC) conditioning_main.cpp -o ea_conditioning $(LIB) + $(CXX) $(CXXFLAGS) $(INC) conditioning_main.cpp -o ea_conditioning $(LIB) -lmpfr -lgmp transpose: transpose_main.o transpose_main.o: transpose_main.cpp diff --git a/cpp/conditioning_main.cpp b/cpp/conditioning_main.cpp index 9bb9f2a..24e37c7 100644 --- a/cpp/conditioning_main.cpp +++ b/cpp/conditioning_main.cpp @@ -5,8 +5,9 @@ #include #include +#include -[[ noreturn ]] void print_usage(){ +[[ noreturn ]] void print_usage() { printf("Usage is: ea_conditioning [-v] \n"); printf("\tor \n\tea_conditioning -n \n\n"); printf("\t : input number of bits to conditioning function.\n"); @@ -26,11 +27,20 @@ exit(-1); } -int main(int argc, char* argv[]){ +int main(int argc, char* argv[]) { bool vetted; - double h_p = -1.0; - double h_in, h_out, n_in, n_out, nw, n, p_high, p_low, psi, omega, output_entropy, power_term; + long double h_p = -1.0; + long double h_in, h_out; + long double outputEntropy; + unsigned int n_in, n_out, nw, n; + mpfr_prec_t precision; int opt; + bool adaquatePrecision = false; + bool closeToFullEntropy = false; + bool fullEntropy = false; + unsigned int maxval; + long double epsilonExp = -1.0; + mpfr_t ap_h_in, ap_p_high, ap_p_low, ap_denom, ap_power_term, ap_psi, ap_omega, ap_outputEntropy, ap_log2epsilon; vetted = true; @@ -57,69 +67,170 @@ int main(int argc, char* argv[]){ } else{ // get n_in - n_in = atof(argv[0]); - if((n_in <= 0) || (floor(n_in) != n_in)){ - printf("n_in must be a positive integer.\n"); + n_in = strtoul(argv[0], NULL, 0); + if(n_in < 1) { + printf("n_in must be greater than 0.\n"); print_usage(); - } + } // get n_out - n_out = atof(argv[1]); - if((n_out <= 0) || (floor(n_out) != n_out)){ - printf("n_out must be a positive integer.\n"); - print_usage(); - } + n_out = strtoul(argv[1], NULL, 0); // get n_out - nw = atof(argv[2]); - if((nw <= 0) || (floor(nw) != nw)){ - printf("n_out must be a positive integer.\n"); - print_usage(); - } + nw = strtoul(argv[2], NULL, 0); // get h_in - h_in = atof(argv[3]); - if((h_in <= 0) || (h_in > n_in)){ + h_in = strtold(argv[3], NULL); + if((h_in <= 0) || (h_in > (long double)n_in)){ printf("h_in must be positive and at most n_in.\n"); print_usage(); } if(!vetted){ - h_p = atof(argv[4]); - if((h_p < 0) || (h_p > 1.0)){ - printf("h' must be between 0 and 1 inclusive.\n"); + h_p = strtold(argv[4], NULL); + if((h_p <= 0) || (h_p > 1.0)){ + printf("h' must be the interval (0 and 1].\n"); print_usage(); } - } + } } - if(nw > n_in) nw = n_in; - - printf("n_in: %f\n", n_in); - printf("n_out: %f\n", n_out); - printf("nw: %f\n", nw); - printf("h_in: %f\n", h_in); - if(!vetted) printf("h': %f\n", h_p); - // compute Output Entropy (Section 3.1.5.1.2) - p_high = pow(2.0, -h_in); - p_low = (1.0-p_high)/(pow(2.0, n_in)-1); + //Step 2 is invariant, and not subject to precision problems. + if(nw > n_in) nw = n_in; //By 90B Appendix E n = std::min(n_out, nw); - power_term = pow(2.0, n_in - n); - psi = power_term*p_low + p_high; - omega = (power_term + sqrt(2*n*power_term*log(2)))*p_low; - output_entropy = -log2(std::max(psi, omega)); - if(output_entropy > n_out) output_entropy = n_out; - if(output_entropy < 0) output_entropy = 0; - - if(vetted){ - printf("\n(Vetted) "); - h_out = output_entropy; + + //Print out the inputs + printf("n_in: %u\n", n_in); + printf("n_out: %u\n", n_out); + printf("nw: %u\n", nw); + printf("h_in: %.22Lg\n", h_in); + if(!vetted) printf("h': %.22Lg\n", h_p); + + // Establish the maximum precision that ought to be necessary + // If something goes wrong, we can increase this precision automatically. + maxval = (n_in>n_out)?n_in:n_out; + maxval = (maxval>nw)?maxval:nw; + precision = 2*maxval; + + //Initialize all the arbitrary precision values + mpfr_inits2(precision, ap_h_in, ap_p_high, ap_p_low, ap_denom, ap_power_term, ap_psi, ap_omega, ap_outputEntropy, ap_log2epsilon, NULL); + + adaquatePrecision = false; + + while(!adaquatePrecision) { + adaquatePrecision = true; + //Initialize arbitrary precision versions of h_in and needed constants. + mpfr_set_ld(ap_h_in, h_in, MPFR_RNDN); + + // compute Output Entropy (Section 3.1.5.1.2) + // Step 1. + // P_high + mpfr_neg(ap_h_in, ap_h_in, MPFR_RNDN); + mpfr_ui_pow(ap_p_high, 2UL, ap_h_in, MPFR_RNDN); + + //P_low + mpfr_ui_sub(ap_p_low, 1UL, ap_p_high, MPFR_RNDN); + mpfr_ui_pow_ui (ap_denom, 2UL, n_in, MPFR_RNDN); + mpfr_sub_ui(ap_denom, ap_denom, 1UL, MPFR_RNDN); + mpfr_div (ap_p_low, ap_p_low, ap_denom, MPFR_RNDN); + + //Prior to moving on, calculate a reused power term + mpfr_ui_pow_ui(ap_power_term, 2UL, n_in - n, MPFR_RNDN); + + //Step 3: Calculate Psi + mpfr_mul(ap_psi, ap_power_term, ap_p_low, MPFR_RNDN); + mpfr_add(ap_psi, ap_psi, ap_p_high, MPFR_RNDN); + + //Step 4: Calculate U (goes into the ap_omega variable) + mpfr_log_ui(ap_omega, 2UL, MPFR_RNDN); + mpfr_mul(ap_omega, ap_omega, ap_power_term, MPFR_RNDN); + mpfr_mul_ui(ap_omega, ap_omega, 2UL*n, MPFR_RNDN); + mpfr_sqrt(ap_omega, ap_omega, MPFR_RNDN); + mpfr_add(ap_omega, ap_omega, ap_power_term, MPFR_RNDN); + + //Step 5: Calculate omega + mpfr_mul(ap_omega, ap_omega, ap_p_low, MPFR_RNDN); + + //Step 6: Compare the values + if(mpfr_cmp(ap_omega, ap_psi) > 0) { + //omega > psi + mpfr_log2(ap_outputEntropy, ap_omega, MPFR_RNDN); + } else { + //omega <= psi + mpfr_log2(ap_outputEntropy, ap_psi, MPFR_RNDN); + } + + //Keep -outputEntropy for calculation of log2epsilon + mpfr_set(ap_log2epsilon, ap_outputEntropy, MPFR_RNDN); + + //finalize outputEntropy + mpfr_neg(ap_outputEntropy, ap_outputEntropy, MPFR_RNDN); + + + if(mpfr_cmp_ui(ap_outputEntropy, n_out) >= 0) { + //Double the precision of everything and try again + adaquatePrecision = false; + precision = 2*precision; + fprintf(stderr, "Indeterminate result. Increasing precision to %ld bits and trying again.\n", precision); + mpfr_set_prec(ap_h_in, precision); + mpfr_set_prec(ap_p_high, precision); + mpfr_set_prec(ap_p_low, precision); + mpfr_set_prec(ap_denom, precision); + mpfr_set_prec(ap_power_term, precision); + mpfr_set_prec(ap_psi, precision); + mpfr_set_prec(ap_omega, precision); + mpfr_set_prec(ap_outputEntropy, precision); + mpfr_set_prec(ap_log2epsilon, precision); + continue; + } + + outputEntropy = mpfr_get_ld(ap_outputEntropy, MPFR_RNDN); + + if(outputEntropy > 0.999 * (long double)n_out) { + closeToFullEntropy = true; + //Calculate -log2(epsilon) + mpfr_div_ui(ap_log2epsilon, ap_log2epsilon, n_out, MPFR_RNDN); + mpfr_log1p(ap_log2epsilon, ap_log2epsilon, MPFR_RNDN); + //Reuse ap_denom to hold log(2) + mpfr_log_ui(ap_denom, 2UL, MPFR_RNDN); + mpfr_div(ap_log2epsilon, ap_log2epsilon, ap_denom, MPFR_RNDN); + //Make it positive + mpfr_neg(ap_log2epsilon, ap_log2epsilon, MPFR_RNDN); + + //Now convert back to a long double + epsilonExp = mpfr_get_ld(ap_log2epsilon, MPFR_RNDN); + + //Should this qualify as "full entropy" under the 2012 draft of SP800-90B? + if(mpfr_cmp_ui(ap_log2epsilon, 64) >= 0) { + fullEntropy = true; + } + } } - else{ + + //We're done with the calculation. Now print results. + if(vetted) { + printf("\n(Vetted) "); + if(closeToFullEntropy) { + if(outputEntropy == (long double)n_out) { + printf("h_out: Close to %.22Lg\n", outputEntropy); + } else { + printf("h_out: %.22Lg\n", outputEntropy); + } + printf("epsilon = 2^(-%.22Lg)", epsilonExp); + if(fullEntropy) { + printf(": SP800-90B 2012 Full Entropy\n"); + } else { + printf("\n"); + } + } else { + printf("h_out: %.22Lg\n", outputEntropy); + } + } else { printf("\n(Non-vetted) "); - h_out = std::min(output_entropy, std::min(0.999*n_out, h_p*n_out)); + h_out = std::min(outputEntropy, std::min(0.999L*((long double)n_out), h_p*(long double)n_out)); + printf("h_out: %.22Lg\n", h_out); } - printf("h_out: %f\n", h_out); + return 0; }