Makes conditioning_main.cpp use arbitrary precision floating point. M…

…akes conditioner render verdict with respect to the 2012 SP800-90B draft's definition of 'full entropy'. Resolves usnistgov#128
joshuaehill · Aug 1, 2020 · e869e3a · e869e3a
1 parent 303e7dd
commit e869e3a
Show file tree

Hide file tree

Showing 3 changed files with 161 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@ Issues on this repository are strictly for problems or questions concerning the
 
 This code package requires a C++11 compiler. The code uses OpenMP directives, so compiler support for OpenMP is expected. GCC is preferred (and the only platform tested). There is one method that involves a GCC built-in function (`chi_square_tests.h -> binary_goodness_of_fit() -> __builtin_popcount()`). To run this you will need some compiler that supplies this GCC built-in function (GCC and clang both do so).
 
-The resulting binary is linked with bzlib and divsufsort, so these libraries (and their associated include files) must be installed and accessible to the compiler.
+The resulting binary is linked with bzlib, divsufsort, GMP MP and GNU MPFR, so these libraries (and their associated include files) must be installed and accessible to the compiler.
 
 See [the wiki](https://github.com/usnistgov/SP800-90B_EntropyAssessment/wiki/Installing-libdivsufsort) for some distribution-specific instructions on installing divsufsort.
 
@@ -55,6 +55,7 @@ You may specify either `-i` or `-c`, and either `-a` or `-t`. These correspond t
 * Note: When testing binary data, no `H_bitstring` assessment is produced, so the `-a` and `-t` options produce the same results for the initial assessment of binary data.
 * `-l`: Reads (at most) `samples` data samples after indexing into the file by `index*samples` bytes.
 * `-v`: Optional verbosity flag for more output. Can be used multiple times.
+=======
 * bits_per_symbol are the number of bits per symbol. Each symbol is expected to fit within a single byte.
 
 To run the non-IID tests, use the Makefile to compile:

diff --git a/cpp/Makefile b/cpp/Makefile
@@ -30,7 +30,7 @@ restart_main.o: restart_main.cpp
 
 conditioning: conditioning_main.o
 conditioning_main.o: conditioning_main.cpp
-	$(CXX) $(CXXFLAGS) $(INC) conditioning_main.cpp -o ea_conditioning $(LIB)
+	$(CXX) $(CXXFLAGS) $(INC) conditioning_main.cpp -o ea_conditioning $(LIB) -lmpfr -lgmp
 
 transpose: transpose_main.o
 transpose_main.o: transpose_main.cpp

diff --git a/cpp/conditioning_main.cpp b/cpp/conditioning_main.cpp
@@ -5,8 +5,9 @@
 #include <algorithm>
 
 #include <getopt.h>
+#include <mpfr.h>
 
-[[ noreturn ]] void print_usage(){
+[[ noreturn ]] void print_usage() {
 	printf("Usage is: ea_conditioning [-v] <n_in> <n_out> <nw> <h_in>\n");
 	printf("\tor \n\tea_conditioning -n <n_in> <n_out> <nw> <h_in> <h'>\n\n");
 	printf("\t <n_in>: input number of bits to conditioning function.\n");
@@ -26,11 +27,20 @@
 	exit(-1);
 }
 
-int main(int argc, char* argv[]){
+int main(int argc, char* argv[]) {
 	bool vetted;
-	double h_p = -1.0;
-	double h_in, h_out, n_in, n_out, nw, n, p_high, p_low, psi, omega, output_entropy, power_term;
+	long double h_p = -1.0;
+	long double h_in, h_out; 
+	long double outputEntropy;
+	unsigned int n_in, n_out, nw, n;
+	mpfr_prec_t precision;
 	int opt;
+	bool adaquatePrecision = false;
+	bool closeToFullEntropy = false;
+	bool fullEntropy = false;
+	unsigned int maxval;
+	long double epsilonExp = -1.0;
+	mpfr_t ap_h_in, ap_p_high, ap_p_low, ap_denom, ap_power_term, ap_psi, ap_omega, ap_outputEntropy, ap_log2epsilon;
 
 	vetted = true;
 
@@ -57,69 +67,170 @@ int main(int argc, char* argv[]){
 	}
 	else{
                 // get n_in     
-                n_in = atof(argv[0]);
-                if((n_in <= 0) || (floor(n_in) != n_in)){
-                        printf("n_in must be a positive integer.\n");
+                n_in = strtoul(argv[0], NULL, 0);
+		if(n_in < 1) {
+                        printf("n_in must be greater than 0.\n");
                         print_usage();
-                }
+		}
 
 	    	// get n_out     
-                n_out = atof(argv[1]);
-                if((n_out <= 0) || (floor(n_out) != n_out)){
-                        printf("n_out must be a positive integer.\n");
-                        print_usage();
-                }
+                n_out = strtoul(argv[1], NULL, 0);
 
                 // get n_out     
-                nw = atof(argv[2]);
-                if((nw <= 0) || (floor(nw) != nw)){
-                        printf("n_out must be a positive integer.\n");
-                        print_usage();
-                }
+                nw = strtoul(argv[2], NULL, 0);
 
                 // get h_in     
-                h_in = atof(argv[3]);
-                if((h_in <= 0) || (h_in > n_in)){
+                h_in = strtold(argv[3], NULL);
+                if((h_in <= 0) || (h_in > (long double)n_in)){
                         printf("h_in must be positive and at most n_in.\n");
                         print_usage();
                 }
 
 		if(!vetted){
-			h_p = atof(argv[4]);
-			if((h_p < 0) || (h_p > 1.0)){
-				printf("h' must be between 0 and 1 inclusive.\n");
+			h_p = strtold(argv[4], NULL);
+			if((h_p <= 0) || (h_p > 1.0)){
+				printf("h' must be the interval (0 and 1].\n");
 				print_usage();
 			}
-		}
+		} 
 	}
 
-	if(nw > n_in) nw = n_in;
-
-	printf("n_in: %f\n", n_in);
-	printf("n_out: %f\n", n_out);
-	printf("nw: %f\n", nw);
-	printf("h_in: %f\n", h_in);
-	if(!vetted) printf("h': %f\n", h_p);
 
-	// compute Output Entropy (Section 3.1.5.1.2)
-	p_high = pow(2.0, -h_in);
-	p_low = (1.0-p_high)/(pow(2.0, n_in)-1);
+	//Step 2 is invariant, and not subject to precision problems.
+	if(nw > n_in) nw = n_in; //By 90B Appendix E
 	n = std::min(n_out, nw);
-	power_term = pow(2.0, n_in - n);
-	psi = power_term*p_low + p_high;
-	omega = (power_term + sqrt(2*n*power_term*log(2)))*p_low;
-	output_entropy = -log2(std::max(psi, omega));
-	if(output_entropy > n_out) output_entropy = n_out;
-	if(output_entropy < 0) output_entropy = 0;
-
-	if(vetted){
-		printf("\n(Vetted) ");
-		h_out = output_entropy;
+
+	//Print out the inputs
+	printf("n_in: %u\n", n_in);
+	printf("n_out: %u\n", n_out);
+	printf("nw: %u\n", nw);
+	printf("h_in: %.22Lg\n", h_in);
+	if(!vetted) printf("h': %.22Lg\n", h_p);
+
+	// Establish the maximum precision that ought to be necessary
+	// If something goes wrong, we can increase this precision automatically.
+	maxval = (n_in>n_out)?n_in:n_out;
+	maxval = (maxval>nw)?maxval:nw;
+	precision = 2*maxval;
+
+	//Initialize all the arbitrary precision values
+	mpfr_inits2(precision, ap_h_in, ap_p_high, ap_p_low, ap_denom, ap_power_term, ap_psi, ap_omega, ap_outputEntropy, ap_log2epsilon, NULL);
+
+	adaquatePrecision = false;
+
+	while(!adaquatePrecision) {
+		adaquatePrecision = true;
+		//Initialize arbitrary precision versions of h_in and needed constants.
+		mpfr_set_ld(ap_h_in, h_in, MPFR_RNDN);
+
+		// compute Output Entropy (Section 3.1.5.1.2)
+		// Step 1.
+		// P_high
+		mpfr_neg(ap_h_in, ap_h_in, MPFR_RNDN);
+		mpfr_ui_pow(ap_p_high, 2UL, ap_h_in, MPFR_RNDN);
+
+		//P_low
+		mpfr_ui_sub(ap_p_low, 1UL, ap_p_high, MPFR_RNDN);
+		mpfr_ui_pow_ui (ap_denom, 2UL, n_in, MPFR_RNDN);
+		mpfr_sub_ui(ap_denom, ap_denom, 1UL, MPFR_RNDN);
+		mpfr_div (ap_p_low, ap_p_low, ap_denom, MPFR_RNDN);
+
+		//Prior to moving on, calculate a reused power term
+		mpfr_ui_pow_ui(ap_power_term, 2UL, n_in - n, MPFR_RNDN);
+
+		//Step 3: Calculate Psi
+		mpfr_mul(ap_psi, ap_power_term, ap_p_low, MPFR_RNDN);
+		mpfr_add(ap_psi, ap_psi, ap_p_high,  MPFR_RNDN);
+
+		//Step 4: Calculate U (goes into the ap_omega variable)
+		mpfr_log_ui(ap_omega, 2UL, MPFR_RNDN);
+		mpfr_mul(ap_omega, ap_omega, ap_power_term, MPFR_RNDN);
+		mpfr_mul_ui(ap_omega, ap_omega, 2UL*n, MPFR_RNDN);
+		mpfr_sqrt(ap_omega, ap_omega, MPFR_RNDN);
+		mpfr_add(ap_omega, ap_omega, ap_power_term,  MPFR_RNDN);
+
+		//Step 5: Calculate omega
+		mpfr_mul(ap_omega, ap_omega, ap_p_low, MPFR_RNDN);
+
+		//Step 6: Compare the values
+		if(mpfr_cmp(ap_omega, ap_psi) > 0) {
+			//omega > psi
+			mpfr_log2(ap_outputEntropy, ap_omega, MPFR_RNDN);
+		} else {
+			//omega <= psi
+			mpfr_log2(ap_outputEntropy, ap_psi, MPFR_RNDN);
+		}
+
+		//Keep -outputEntropy for calculation of log2epsilon
+		mpfr_set(ap_log2epsilon, ap_outputEntropy, MPFR_RNDN);
+
+		//finalize outputEntropy
+		mpfr_neg(ap_outputEntropy, ap_outputEntropy, MPFR_RNDN);
+
+
+		if(mpfr_cmp_ui(ap_outputEntropy, n_out) >= 0) {
+			//Double the precision of everything and try again
+			adaquatePrecision = false;
+			precision = 2*precision;
+			fprintf(stderr, "Indeterminate result. Increasing precision to %ld bits and trying again.\n", precision);
+			mpfr_set_prec(ap_h_in, precision);
+			mpfr_set_prec(ap_p_high, precision);
+			mpfr_set_prec(ap_p_low, precision);
+			mpfr_set_prec(ap_denom, precision);
+			mpfr_set_prec(ap_power_term, precision);
+			mpfr_set_prec(ap_psi, precision);
+			mpfr_set_prec(ap_omega, precision);
+			mpfr_set_prec(ap_outputEntropy, precision);
+			mpfr_set_prec(ap_log2epsilon, precision);
+			continue;
+		}
+
+		outputEntropy = mpfr_get_ld(ap_outputEntropy, MPFR_RNDN);
+
+		if(outputEntropy > 0.999 * (long double)n_out) {
+			closeToFullEntropy = true;
+			//Calculate -log2(epsilon)
+			mpfr_div_ui(ap_log2epsilon, ap_log2epsilon, n_out, MPFR_RNDN);
+			mpfr_log1p(ap_log2epsilon, ap_log2epsilon, MPFR_RNDN);
+			//Reuse ap_denom to hold log(2)
+			mpfr_log_ui(ap_denom, 2UL, MPFR_RNDN);
+			mpfr_div(ap_log2epsilon, ap_log2epsilon, ap_denom, MPFR_RNDN);
+			//Make it positive
+			mpfr_neg(ap_log2epsilon, ap_log2epsilon, MPFR_RNDN);
+
+			//Now convert back to a long double
+			epsilonExp = mpfr_get_ld(ap_log2epsilon, MPFR_RNDN);
+
+			//Should this qualify as "full entropy" under the 2012 draft of SP800-90B?
+			if(mpfr_cmp_ui(ap_log2epsilon, 64) >= 0) {
+				fullEntropy = true;
+			}
+		}
 	}
-	else{
+
+	//We're done with the calculation. Now print results.
+	if(vetted) {
+		printf("\n(Vetted) ");
+		if(closeToFullEntropy) {
+			if(outputEntropy == (long double)n_out) {
+				printf("h_out: Close to %.22Lg\n", outputEntropy);
+			} else {
+				printf("h_out: %.22Lg\n", outputEntropy);
+			}
+			printf("epsilon = 2^(-%.22Lg)", epsilonExp);
+			if(fullEntropy) {
+				printf(": SP800-90B 2012 Full Entropy\n");
+			} else {
+				printf("\n");
+			}
+		} else {
+			printf("h_out: %.22Lg\n", outputEntropy);
+		}
+	} else {
 		printf("\n(Non-vetted) ");
-		h_out = std::min(output_entropy, std::min(0.999*n_out, h_p*n_out));
+		h_out = std::min(outputEntropy, std::min(0.999L*((long double)n_out), h_p*(long double)n_out));
+		printf("h_out: %.22Lg\n", h_out);
 	}
 
-	printf("h_out: %f\n", h_out);
+	return 0;
 }