Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
150 lines (125 sloc) 4.54 KB
/*
* Copyright (c) 2019 Trail of Bits, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdint.h>
#include <stdbool.h>
#include "output.h"
#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
#include "avx512.h"
#ifndef __FILE_NAME__
#define __FILE_NAME__ "AVX512"
#endif
bool avx512_check(void) {
return __builtin_cpu_supports("avx512bw");
}
// use AVX512 instructions to compare 8 64-bit quantities at once
// The inner loop is unrolled to maximize SIMD execution performance
typedef uint64_t VECTOR[8] __attribute__ ((aligned (32)));
uint64_t avx512_do_range(
uint64_t start, uint64_t end,
uint64_t secret, bool *found)
{
// how many ops were done
uint64_t out = 0;
// how many comparison are done per loop
const unsigned OPS_PER_LOOP = 8*8;
uint64_t max_iterations = (end - start) / OPS_PER_LOOP;
VECTOR start_vec = { start+0, start+1, start+2, start+3, start+4, start+5, start+6, start+7 };
// every iteration, add 8 to each since we compare 8 values at once
VECTOR increment = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 };
// Test each lane of the vector against the following values
VECTOR secret_vec = { secret, secret, secret, secret, secret, secret, secret, secret };
*found = false;
__m512i avx_start = _mm512_load_si512((__m512i*)&start_vec);
__m512i avx_increment = _mm512_load_si512((__m512i*)&increment);
__m512i avx_secret = _mm512_load_si512((__m512i*)&secret_vec);
uint64_t counter = max_iterations;
// unroll loop by 8
while(counter > 0) {
// compare initial values
__mmask8 result0 = _mm512_cmpeq_epi64_mask(avx_start, avx_secret);
// store comparison results in integer land and not SIMD regs
int summary0 = result0;
// prepare next values for a compare
__m512i next1 = _mm512_add_epi64(avx_start, avx_increment);
// do the compare
__mmask8 result1 = _mm512_cmpeq_epi64_mask(next1, avx_secret);
// store results
int summary1 = result1;
// repeat...
__m512i next2 = _mm512_add_epi64(next1, avx_increment);
__mmask8 result2 = _mm512_cmpeq_epi64_mask(next2, avx_secret);
int summary2 = result2;
__m512i next3 = _mm512_add_epi64(next2, avx_increment);
__mmask8 result3 = _mm512_cmpeq_epi64_mask(next3, avx_secret);
int summary3 = result3;
__m512i next4 = _mm512_add_epi64(next3, avx_increment);
__mmask8 result4 = _mm512_cmpeq_epi64_mask(next4, avx_secret);
int summary4 = result4;
__m512i next5 = _mm512_add_epi64(next4, avx_increment);
__mmask8 result5 = _mm512_cmpeq_epi64_mask(next5, avx_secret);
int summary5 = result5;
__m512i next6 = _mm512_add_epi64(next5, avx_increment);
__mmask8 result6 = _mm512_cmpeq_epi64_mask(next6, avx_secret);
int summary6 = result6;
__m512i next7 = _mm512_add_epi64(next6, avx_increment);
__mmask8 result7 = _mm512_cmpeq_epi64_mask(next7, avx_secret);
int summary7 = result7;
__m512i next8 = _mm512_add_epi64(next7, avx_increment);
avx_start = next8;
// check whether secret was found in any of the
// values tested this loop iteration
if(0 != summary0 ||
0 != summary1 ||
0 != summary2 ||
0 != summary3 ||
0 != summary4 ||
0 != summary5 ||
0 != summary6 ||
0 != summary7
)
{
// found it
break;
}
--counter;
}
out = max_iterations - counter;
// check members of the last vector to see which number,
// if any, was a match
uint64_t index = out;
for(unsigned i = 0; i < OPS_PER_LOOP; i++) {
uint64_t val = start + (index * OPS_PER_LOOP) + i;
if(val <= end && val == secret) {
*found = true;
break;
}
}
if(secret == end) {
*found = true;
}
return out*OPS_PER_LOOP;
}
uint64_t avx512_method(uint64_t secret, bool *found, uint64_t h_start, uint64_t h_end) {
if (!avx512_check()) {
log_error(
__FILE_NAME__,
"Requested to use AVX512 method but your CPU doesn't support AVX512\n");
exit(-1);
}
return avx512_do_range(h_start, h_end, secret, found);
}
You can’t perform that action at this time.