Skip to content

Commit

Permalink
Makes conditioning_main.cpp use arbitrary precision floating point. M…
Browse files Browse the repository at this point in the history
…akes conditioner render verdict with respect to the 2012 SP800-90B draft's definition of 'full entropy'. Resolves usnistgov#128
  • Loading branch information
joshuaehill committed Aug 1, 2020
1 parent 303e7dd commit e869e3a
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 49 deletions.
3 changes: 2 additions & 1 deletion README.md
Expand Up @@ -14,7 +14,7 @@ Issues on this repository are strictly for problems or questions concerning the

This code package requires a C++11 compiler. The code uses OpenMP directives, so compiler support for OpenMP is expected. GCC is preferred (and the only platform tested). There is one method that involves a GCC built-in function (`chi_square_tests.h -> binary_goodness_of_fit() -> __builtin_popcount()`). To run this you will need some compiler that supplies this GCC built-in function (GCC and clang both do so).

The resulting binary is linked with bzlib and divsufsort, so these libraries (and their associated include files) must be installed and accessible to the compiler.
The resulting binary is linked with bzlib, divsufsort, GMP MP and GNU MPFR, so these libraries (and their associated include files) must be installed and accessible to the compiler.

See [the wiki](https://github.com/usnistgov/SP800-90B_EntropyAssessment/wiki/Installing-libdivsufsort) for some distribution-specific instructions on installing divsufsort.

Expand Down Expand Up @@ -55,6 +55,7 @@ You may specify either `-i` or `-c`, and either `-a` or `-t`. These correspond t
* Note: When testing binary data, no `H_bitstring` assessment is produced, so the `-a` and `-t` options produce the same results for the initial assessment of binary data.
* `-l`: Reads (at most) `samples` data samples after indexing into the file by `index*samples` bytes.
* `-v`: Optional verbosity flag for more output. Can be used multiple times.
=======
* bits_per_symbol are the number of bits per symbol. Each symbol is expected to fit within a single byte.

To run the non-IID tests, use the Makefile to compile:
Expand Down
2 changes: 1 addition & 1 deletion cpp/Makefile
Expand Up @@ -30,7 +30,7 @@ restart_main.o: restart_main.cpp

conditioning: conditioning_main.o
conditioning_main.o: conditioning_main.cpp
$(CXX) $(CXXFLAGS) $(INC) conditioning_main.cpp -o ea_conditioning $(LIB)
$(CXX) $(CXXFLAGS) $(INC) conditioning_main.cpp -o ea_conditioning $(LIB) -lmpfr -lgmp

transpose: transpose_main.o
transpose_main.o: transpose_main.cpp
Expand Down
205 changes: 158 additions & 47 deletions cpp/conditioning_main.cpp
Expand Up @@ -5,8 +5,9 @@
#include <algorithm>

#include <getopt.h>
#include <mpfr.h>

[[ noreturn ]] void print_usage(){
[[ noreturn ]] void print_usage() {
printf("Usage is: ea_conditioning [-v] <n_in> <n_out> <nw> <h_in>\n");
printf("\tor \n\tea_conditioning -n <n_in> <n_out> <nw> <h_in> <h'>\n\n");
printf("\t <n_in>: input number of bits to conditioning function.\n");
Expand All @@ -26,11 +27,20 @@
exit(-1);
}

int main(int argc, char* argv[]){
int main(int argc, char* argv[]) {
bool vetted;
double h_p = -1.0;
double h_in, h_out, n_in, n_out, nw, n, p_high, p_low, psi, omega, output_entropy, power_term;
long double h_p = -1.0;
long double h_in, h_out;
long double outputEntropy;
unsigned int n_in, n_out, nw, n;
mpfr_prec_t precision;
int opt;
bool adaquatePrecision = false;
bool closeToFullEntropy = false;
bool fullEntropy = false;
unsigned int maxval;
long double epsilonExp = -1.0;
mpfr_t ap_h_in, ap_p_high, ap_p_low, ap_denom, ap_power_term, ap_psi, ap_omega, ap_outputEntropy, ap_log2epsilon;

vetted = true;

Expand All @@ -57,69 +67,170 @@ int main(int argc, char* argv[]){
}
else{
// get n_in
n_in = atof(argv[0]);
if((n_in <= 0) || (floor(n_in) != n_in)){
printf("n_in must be a positive integer.\n");
n_in = strtoul(argv[0], NULL, 0);
if(n_in < 1) {
printf("n_in must be greater than 0.\n");
print_usage();
}
}

// get n_out
n_out = atof(argv[1]);
if((n_out <= 0) || (floor(n_out) != n_out)){
printf("n_out must be a positive integer.\n");
print_usage();
}
n_out = strtoul(argv[1], NULL, 0);

// get n_out
nw = atof(argv[2]);
if((nw <= 0) || (floor(nw) != nw)){
printf("n_out must be a positive integer.\n");
print_usage();
}
nw = strtoul(argv[2], NULL, 0);

// get h_in
h_in = atof(argv[3]);
if((h_in <= 0) || (h_in > n_in)){
h_in = strtold(argv[3], NULL);
if((h_in <= 0) || (h_in > (long double)n_in)){
printf("h_in must be positive and at most n_in.\n");
print_usage();
}

if(!vetted){
h_p = atof(argv[4]);
if((h_p < 0) || (h_p > 1.0)){
printf("h' must be between 0 and 1 inclusive.\n");
h_p = strtold(argv[4], NULL);
if((h_p <= 0) || (h_p > 1.0)){
printf("h' must be the interval (0 and 1].\n");
print_usage();
}
}
}
}

if(nw > n_in) nw = n_in;

printf("n_in: %f\n", n_in);
printf("n_out: %f\n", n_out);
printf("nw: %f\n", nw);
printf("h_in: %f\n", h_in);
if(!vetted) printf("h': %f\n", h_p);

// compute Output Entropy (Section 3.1.5.1.2)
p_high = pow(2.0, -h_in);
p_low = (1.0-p_high)/(pow(2.0, n_in)-1);
//Step 2 is invariant, and not subject to precision problems.
if(nw > n_in) nw = n_in; //By 90B Appendix E
n = std::min(n_out, nw);
power_term = pow(2.0, n_in - n);
psi = power_term*p_low + p_high;
omega = (power_term + sqrt(2*n*power_term*log(2)))*p_low;
output_entropy = -log2(std::max(psi, omega));
if(output_entropy > n_out) output_entropy = n_out;
if(output_entropy < 0) output_entropy = 0;

if(vetted){
printf("\n(Vetted) ");
h_out = output_entropy;

//Print out the inputs
printf("n_in: %u\n", n_in);
printf("n_out: %u\n", n_out);
printf("nw: %u\n", nw);
printf("h_in: %.22Lg\n", h_in);
if(!vetted) printf("h': %.22Lg\n", h_p);

// Establish the maximum precision that ought to be necessary
// If something goes wrong, we can increase this precision automatically.
maxval = (n_in>n_out)?n_in:n_out;
maxval = (maxval>nw)?maxval:nw;
precision = 2*maxval;

//Initialize all the arbitrary precision values
mpfr_inits2(precision, ap_h_in, ap_p_high, ap_p_low, ap_denom, ap_power_term, ap_psi, ap_omega, ap_outputEntropy, ap_log2epsilon, NULL);

adaquatePrecision = false;

while(!adaquatePrecision) {
adaquatePrecision = true;
//Initialize arbitrary precision versions of h_in and needed constants.
mpfr_set_ld(ap_h_in, h_in, MPFR_RNDN);

// compute Output Entropy (Section 3.1.5.1.2)
// Step 1.
// P_high
mpfr_neg(ap_h_in, ap_h_in, MPFR_RNDN);
mpfr_ui_pow(ap_p_high, 2UL, ap_h_in, MPFR_RNDN);

//P_low
mpfr_ui_sub(ap_p_low, 1UL, ap_p_high, MPFR_RNDN);
mpfr_ui_pow_ui (ap_denom, 2UL, n_in, MPFR_RNDN);
mpfr_sub_ui(ap_denom, ap_denom, 1UL, MPFR_RNDN);
mpfr_div (ap_p_low, ap_p_low, ap_denom, MPFR_RNDN);

//Prior to moving on, calculate a reused power term
mpfr_ui_pow_ui(ap_power_term, 2UL, n_in - n, MPFR_RNDN);

//Step 3: Calculate Psi
mpfr_mul(ap_psi, ap_power_term, ap_p_low, MPFR_RNDN);
mpfr_add(ap_psi, ap_psi, ap_p_high, MPFR_RNDN);

//Step 4: Calculate U (goes into the ap_omega variable)
mpfr_log_ui(ap_omega, 2UL, MPFR_RNDN);
mpfr_mul(ap_omega, ap_omega, ap_power_term, MPFR_RNDN);
mpfr_mul_ui(ap_omega, ap_omega, 2UL*n, MPFR_RNDN);
mpfr_sqrt(ap_omega, ap_omega, MPFR_RNDN);
mpfr_add(ap_omega, ap_omega, ap_power_term, MPFR_RNDN);

//Step 5: Calculate omega
mpfr_mul(ap_omega, ap_omega, ap_p_low, MPFR_RNDN);

//Step 6: Compare the values
if(mpfr_cmp(ap_omega, ap_psi) > 0) {
//omega > psi
mpfr_log2(ap_outputEntropy, ap_omega, MPFR_RNDN);
} else {
//omega <= psi
mpfr_log2(ap_outputEntropy, ap_psi, MPFR_RNDN);
}

//Keep -outputEntropy for calculation of log2epsilon
mpfr_set(ap_log2epsilon, ap_outputEntropy, MPFR_RNDN);

//finalize outputEntropy
mpfr_neg(ap_outputEntropy, ap_outputEntropy, MPFR_RNDN);


if(mpfr_cmp_ui(ap_outputEntropy, n_out) >= 0) {
//Double the precision of everything and try again
adaquatePrecision = false;
precision = 2*precision;
fprintf(stderr, "Indeterminate result. Increasing precision to %ld bits and trying again.\n", precision);
mpfr_set_prec(ap_h_in, precision);
mpfr_set_prec(ap_p_high, precision);
mpfr_set_prec(ap_p_low, precision);
mpfr_set_prec(ap_denom, precision);
mpfr_set_prec(ap_power_term, precision);
mpfr_set_prec(ap_psi, precision);
mpfr_set_prec(ap_omega, precision);
mpfr_set_prec(ap_outputEntropy, precision);
mpfr_set_prec(ap_log2epsilon, precision);
continue;
}

outputEntropy = mpfr_get_ld(ap_outputEntropy, MPFR_RNDN);

if(outputEntropy > 0.999 * (long double)n_out) {
closeToFullEntropy = true;
//Calculate -log2(epsilon)
mpfr_div_ui(ap_log2epsilon, ap_log2epsilon, n_out, MPFR_RNDN);
mpfr_log1p(ap_log2epsilon, ap_log2epsilon, MPFR_RNDN);
//Reuse ap_denom to hold log(2)
mpfr_log_ui(ap_denom, 2UL, MPFR_RNDN);
mpfr_div(ap_log2epsilon, ap_log2epsilon, ap_denom, MPFR_RNDN);
//Make it positive
mpfr_neg(ap_log2epsilon, ap_log2epsilon, MPFR_RNDN);

//Now convert back to a long double
epsilonExp = mpfr_get_ld(ap_log2epsilon, MPFR_RNDN);

//Should this qualify as "full entropy" under the 2012 draft of SP800-90B?
if(mpfr_cmp_ui(ap_log2epsilon, 64) >= 0) {
fullEntropy = true;
}
}
}
else{

//We're done with the calculation. Now print results.
if(vetted) {
printf("\n(Vetted) ");
if(closeToFullEntropy) {
if(outputEntropy == (long double)n_out) {
printf("h_out: Close to %.22Lg\n", outputEntropy);
} else {
printf("h_out: %.22Lg\n", outputEntropy);
}
printf("epsilon = 2^(-%.22Lg)", epsilonExp);
if(fullEntropy) {
printf(": SP800-90B 2012 Full Entropy\n");
} else {
printf("\n");
}
} else {
printf("h_out: %.22Lg\n", outputEntropy);
}
} else {
printf("\n(Non-vetted) ");
h_out = std::min(output_entropy, std::min(0.999*n_out, h_p*n_out));
h_out = std::min(outputEntropy, std::min(0.999L*((long double)n_out), h_p*(long double)n_out));
printf("h_out: %.22Lg\n", h_out);
}

printf("h_out: %f\n", h_out);
return 0;
}

0 comments on commit e869e3a

Please sign in to comment.