Skip to content

Commit

Permalink
use vpaddb in the domain-crosssing test, fix test text
Browse files Browse the repository at this point in the history
  • Loading branch information
travisdowns committed Feb 12, 2018
1 parent f38db8d commit 0c4e467
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
8 changes: 4 additions & 4 deletions vector-benches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ void register_vector(GroupList& list) {
using default_maker = BenchmarkMaker<TIMER>;

auto benches = std::vector<Benchmark> {
default_maker::template make_bench<bypass_vmovdqa_latency>(vector_group.get(), "movdqa", "movdqa [mem] -> pxor latency", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_vmovdqu_latency>(vector_group.get(), "movdqu", "movdqu [mem] -> pxor latency", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_vmovups_latency>(vector_group.get(), "movups", "movups [mem] -> pxor latency", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_vmovupd_latency>(vector_group.get(), "movupd", "movupd [mem] -> pxor latency", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_vmovdqa_latency>(vector_group.get(), "movdqa", "movdqa [mem] -> paddb latency", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_vmovdqu_latency>(vector_group.get(), "movdqu", "movdqu [mem] -> paddb latency", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_vmovups_latency>(vector_group.get(), "movups", "movups [mem] -> paddb latency", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_vmovupd_latency>(vector_group.get(), "movupd", "movupd [mem] -> paddb latency", 1, []{ return nullptr; }, 100000),

default_maker::template make_bench<bypass_movd_latency>(vector_group.get(), "movd", "movq rax,xmm0 -> xmm0,rax lat", 1, []{ return nullptr; }, 100000),
default_maker::template make_bench<bypass_movq_latency>(vector_group.get(), "movq", "movq rax,xmm0 -> xmm0,rax lat", 1, []{ return nullptr; }, 100000)
Expand Down
7 changes: 4 additions & 3 deletions x86_methods.asm
Original file line number Diff line number Diff line change
Expand Up @@ -675,10 +675,11 @@ define_bench bypass_%1_latency
sub rsp, 120
xor eax, eax
vpxor xmm1, xmm1, xmm1
vpsubb xmm1, xmm1, [rsp + rax] ; to exactly cancel out the vpaddb below
.top:
%1 xmm0, [rsp + rax] ; 7 cycles
vpand xmm0, xmm0, xmm1 ; 1 cycle
vmovq rax, xmm0 ; 1 cycle
%1 xmm0, [rsp + rax] ; 6 cycles
vpaddb xmm0, xmm0, xmm1 ; 1 cycle
vmovq rax, xmm0 ; 2 cycles
dec rdi
jnz .top
add rsp, 120
Expand Down

0 comments on commit 0c4e467

Please sign in to comment.