Skip to content
Permalink
Browse files

Add String#blank? variants with STTNI

================== Test String Length: 0 ==================
Warming up --------------------------------------
          Fast Blank    14.259k i/100ms
  Fast ActiveSupport    13.329k i/100ms
          Slow Blank    10.174k i/100ms
      New Slow Blank    13.816k i/100ms
  New 2.4 Slow Blank    11.670k i/100ms
         STTNI Blank    13.824k i/100ms
     OPT STTNI Blank    13.775k i/100ms
Calculating -------------------------------------
          Fast Blank     20.035M (±14.8%) i/s -     94.623M in   4.935799s
  Fast ActiveSupport     19.737M (±16.0%) i/s -     93.383M in   4.935819s
          Slow Blank      1.476M (±16.5%) i/s -      7.091M in   4.994757s
      New Slow Blank     17.087M (±10.4%) i/s -     82.523M in   4.943837s
  New 2.4 Slow Blank     16.800M (±12.1%) i/s -     80.535M in   4.937920s
         STTNI Blank     23.243M (±12.4%) i/s -    111.269M in   4.932225s
     OPT STTNI Blank     30.283M (±15.6%) i/s -    142.847M in   4.903891s

Comparison:
     OPT STTNI Blank: 30283481.3 i/s
         STTNI Blank: 23243024.2 i/s - same-ish: difference falls within error
          Fast Blank: 20034811.1 i/s - 1.51x slower
  Fast ActiveSupport: 19736537.6 i/s - 1.53x slower
      New Slow Blank: 17087440.1 i/s - 1.77x slower
  New 2.4 Slow Blank: 16799664.3 i/s - 1.80x slower
          Slow Blank:  1476016.0 i/s - 20.52x slower

================== Test String Length: 6 ==================
Warming up --------------------------------------
          Fast Blank    13.419k i/100ms
  Fast ActiveSupport    12.957k i/100ms
          Slow Blank    10.153k i/100ms
      New Slow Blank    11.879k i/100ms
  New 2.4 Slow Blank    11.852k i/100ms
         STTNI Blank    13.421k i/100ms
     OPT STTNI Blank    14.059k i/100ms
Calculating -------------------------------------
          Fast Blank      9.756M (±14.9%) i/s -     46.537M in   4.961562s
  Fast ActiveSupport      9.421M (±15.3%) i/s -     44.948M in   4.964374s
          Slow Blank      1.389M (±13.6%) i/s -      6.762M in   4.996323s
      New Slow Blank      3.171M (±15.5%) i/s -     15.253M in   4.982404s
  New 2.4 Slow Blank      5.386M (±11.4%) i/s -     26.228M in   4.974770s
         STTNI Blank     21.376M (±14.1%) i/s -    101.329M in   4.931268s
     OPT STTNI Blank     29.402M (±12.2%) i/s -    139.817M in   4.913342s

Comparison:
     OPT STTNI Blank: 29402424.7 i/s
         STTNI Blank: 21376387.7 i/s - 1.38x slower
          Fast Blank:  9756198.0 i/s - 3.01x slower
  Fast ActiveSupport:  9421186.7 i/s - 3.12x slower
  New 2.4 Slow Blank:  5386232.3 i/s - 5.46x slower
      New Slow Blank:  3171289.9 i/s - 9.27x slower
          Slow Blank:  1388642.2 i/s - 21.17x slower

================== Test String Length: 14 ==================
Warming up --------------------------------------
          Fast Blank    13.596k i/100ms
  Fast ActiveSupport    12.724k i/100ms
          Slow Blank    11.755k i/100ms
      New Slow Blank    10.147k i/100ms
  New 2.4 Slow Blank    12.317k i/100ms
         STTNI Blank    14.073k i/100ms
     OPT STTNI Blank    14.081k i/100ms
Calculating -------------------------------------
          Fast Blank     17.438M (±15.6%) i/s -     82.256M in   4.940746s
  Fast ActiveSupport     16.874M (±19.8%) i/s -     77.782M in   4.942805s
          Slow Blank      2.955M (±14.4%) i/s -     14.294M in   4.986853s
      New Slow Blank      1.440M (±13.5%) i/s -      7.012M in   4.995461s
  New 2.4 Slow Blank      4.934M (± 9.9%) i/s -     24.215M in   4.981225s
         STTNI Blank     21.319M (±13.4%) i/s -    101.565M in   4.934347s
     OPT STTNI Blank     27.009M (±12.0%) i/s -    129.278M in   4.924119s

Comparison:
     OPT STTNI Blank: 27009192.4 i/s
         STTNI Blank: 21319235.6 i/s - same-ish: difference falls within error
          Fast Blank: 17438446.4 i/s - 1.55x slower
  Fast ActiveSupport: 16873628.1 i/s - 1.60x slower
  New 2.4 Slow Blank:  4933634.7 i/s - 5.47x slower
          Slow Blank:  2954686.6 i/s - 9.14x slower
      New Slow Blank:  1439843.4 i/s - 18.76x slower

================== Test String Length: 24 ==================
Warming up --------------------------------------
          Fast Blank    14.126k i/100ms
  Fast ActiveSupport    13.068k i/100ms
          Slow Blank    11.753k i/100ms
      New Slow Blank     9.572k i/100ms
  New 2.4 Slow Blank    11.343k i/100ms
         STTNI Blank    14.232k i/100ms
     OPT STTNI Blank    14.753k i/100ms
Calculating -------------------------------------
          Fast Blank     12.329M (±12.3%) i/s -     59.541M in   4.959606s
  Fast ActiveSupport     11.996M (±12.0%) i/s -     57.813M in   4.960360s
          Slow Blank      2.501M (±11.9%) i/s -     12.211M in   4.987997s
      New Slow Blank      1.646M (±11.6%) i/s -      8.060M in   4.992641s
  New 2.4 Slow Blank      4.708M (±11.7%) i/s -     22.958M in   4.979817s
         STTNI Blank     21.717M (±12.4%) i/s -    103.922M in   4.939324s
     OPT STTNI Blank     27.836M (±10.1%) i/s -    134.533M in   4.931179s

Comparison:
     OPT STTNI Blank: 27835864.4 i/s
         STTNI Blank: 21717318.6 i/s - 1.28x slower
          Fast Blank: 12328898.2 i/s - 2.26x slower
  Fast ActiveSupport: 11995526.6 i/s - 2.32x slower
  New 2.4 Slow Blank:  4708149.9 i/s - 5.91x slower
          Slow Blank:  2501065.4 i/s - 11.13x slower
      New Slow Blank:  1645770.4 i/s - 16.91x slower

================== Test String Length: 136 ==================
Calculating -------------------------------------
          Fast Blank     12.142M (±11.3%) i/s -     58.680M in   4.961993s
  Fast ActiveSupport     11.938M (±14.5%) i/s -     57.122M in   4.965141s
          Slow Blank      2.491M (±12.8%) i/s -     12.127M in   4.989460s
      New Slow Blank      1.565M (±15.9%) i/s -      7.522M in   4.993845s
  New 2.4 Slow Blank      4.547M (±14.3%) i/s -     21.930M in   4.979022s
         STTNI Blank     21.340M (±12.6%) i/s -    101.527M in   4.934000s
     OPT STTNI Blank     27.229M (±11.9%) i/s -    130.019M in   4.917849s

Comparison:
     OPT STTNI Blank: 27228862.5 i/s
         STTNI Blank: 21339739.6 i/s - same-ish: difference falls within error
          Fast Blank: 12141560.8 i/s - 2.24x slower
  Fast ActiveSupport: 11937595.6 i/s - 2.28x slower
  New 2.4 Slow Blank:  4547475.6 i/s - 5.99x slower
          Slow Blank:  2491178.2 i/s - 10.93x slower
      New Slow Blank:  1564905.7 i/s - 17.40x slower

================== Test String Length: 145 ==================
Warming up --------------------------------------
          Fast Blank    12.007k i/100ms
  Fast ActiveSupport    11.345k i/100ms
          Slow Blank     9.003k i/100ms
      New Slow Blank    11.446k i/100ms
  New 2.4 Slow Blank    12.120k i/100ms
         STTNI Blank    13.067k i/100ms
     OPT STTNI Blank    13.041k i/100ms
Calculating -------------------------------------
          Fast Blank    699.634k (± 8.9%) i/s -      3.470M in   5.010048s
  Fast ActiveSupport    675.785k (± 6.8%) i/s -      3.358M in   4.996146s
          Slow Blank    541.659k (±13.3%) i/s -      2.647M in   5.007648s
      New Slow Blank      1.939M (±10.2%) i/s -      9.546M in   4.996150s
  New 2.4 Slow Blank      2.726M (±11.3%) i/s -     13.356M in   4.987874s
         STTNI Blank     13.335M (±10.3%) i/s -     64.708M in   4.955954s
     OPT STTNI Blank     15.856M (±12.0%) i/s -     75.964M in   4.948017s

Comparison:
     OPT STTNI Blank: 15855856.1 i/s
         STTNI Blank: 13334696.1 i/s - same-ish: difference falls within error
  New 2.4 Slow Blank:  2725737.8 i/s - 5.82x slower
      New Slow Blank:  1938651.9 i/s - 8.18x slower
          Fast Blank:   699633.6 i/s - 22.66x slower
  Fast ActiveSupport:   675784.9 i/s - 23.46x slower
          Slow Blank:   541658.6 i/s - 29.27x slower
  • Loading branch information...
nurse committed May 19, 2016
1 parent c8fd8d3 commit e6bc209abf81d53c2e3374dc52c2a128570c6055
Showing with 258 additions and 0 deletions.
  1. +107 −0 bench.rb
  2. +1 −0 compile.c
  3. +1 −0 defs/id.def
  4. +28 −0 insns.def
  5. +119 −0 string.c
  6. +1 −0 vm.c
  7. +1 −0 vm_core.h
107 bench.rb
@@ -0,0 +1,107 @@
#!/usr/bin/env ruby
$: << File.dirname(__FILE__)+'/lib'
require 'benchmark/ips'
require 'fast_blank'

class String
# active support implementation
def slow_blank?
/\A[[:space:]]*\z/ === self
end

def new_slow_blank?
empty? || !(/[[:^space:]]/ === self)
end

def n24_slow_blank?
empty? || !(/[[:^space:]]/.match? self)
end
end

test_strings = [
"",
"\r\n\r\n ",
"this is a test",
" this is a longer test",
" this is a longer test
this is a longer test
this is a longer test
this is a longer test
this is a longer test",
"
"
]

test_strings.each do |s|
raise "failed on #{s.inspect}" if s.blank? != s.slow_blank?
raise "failed on #{s.inspect}" if s.blank? != s.new_slow_blank?
raise "failed on #{s.inspect}" if s.blank? != s.n24_slow_blank?
raise "failed on #{s.inspect}" if s.blank? != s.sttni_blank?
raise "failed on #{s.inspect}" if s.blank? != s.opt_sttni_blank?
end

test_strings.each do |s|
puts "\n================== Test String Length: #{s.length} =================="
Benchmark.ips do |x|
x.report("Fast Blank") do |times|
i = 0
while i < times
s.blank?
i += 1
end
end

x.report("Fast ActiveSupport") do |times|
i = 0
while i < times
s.blank_as?
i += 1
end
end

x.report("Slow Blank") do |times|
i = 0
while i < times
s.slow_blank?
i += 1
end
end

x.report("New Slow Blank") do |times|
i = 0
while i < times
s.new_slow_blank?
i += 1
end
end

x.report("New 2.4 Slow Blank") do |times|
i = 0
while i < times
s.n24_slow_blank?
i += 1
end
end

x.report("STTNI Blank") do |times|
i = 0
while i < times
s.sttni_blank?
i += 1
end
end

x.report("OPT STTNI Blank") do |times|
i = 0
while i < times
s.opt_sttni_blank?
i += 1
end
end

x.compare!
end
end
@@ -2371,6 +2371,7 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj)
case idLength: SP_INSN(length); return COMPILE_OK;
case idSize: SP_INSN(size); return COMPILE_OK;
case idEmptyP: SP_INSN(empty_p);return COMPILE_OK;
case idOpt_sttni_blankP: SP_INSN(opt_sttni_blank_p);return COMPILE_OK;
case idSucc: SP_INSN(succ); return COMPILE_OK;
case idNot: SP_INSN(not); return COMPILE_OK;
}
@@ -46,6 +46,7 @@ firstline, predefined = __LINE__+1, %[\
_ UScore
"/*NULL*/" NULL
opt_sttni_blank?
empty?
eql?
respond_to? Respond_to
@@ -2032,6 +2032,34 @@ opt_empty_p
}
}

/**
@c optimize
@e optimized sttni_blank?
@j 最適化された recv.empty?()。
*/
DEFINE_INSN
opt_opt_sttni_blank_p
(CALL_INFO ci, CALL_CACHE cc)
(VALUE recv)
(VALUE val)
{
if (!SPECIAL_CONST_P(recv)) {
if (RBASIC_CLASS(recv) == rb_cString &&
BASIC_OP_UNREDEFINED_P(BOP_OPT_STTNI_BLANK_P, STRING_REDEFINED_OP_FLAG)) {
VALUE rb_str_blank(VALUE);
val = rb_str_blank(recv);
}
else {
goto INSN_LABEL(normal_dispatch);
}
}
else {
INSN_LABEL(normal_dispatch):
PUSH(recv);
CALL_SIMPLE_METHOD(recv);
}
}

/**
@c optimize
@e optimized succ
119 string.c
@@ -1706,6 +1706,123 @@ rb_str_empty(VALUE str)
return Qfalse;
}

#include <x86intrin.h>
static int
str_blank0(const unsigned char **pp, const unsigned char *e, int epilogue)
{
const unsigned char *p = *pp;
while (p < e) {
switch (*p++) {
case 9: case 10: case 11: case 12: case 13: case 32:
if (!epilogue) {
*pp = p;
return TRUE;
}
break;
case 0xC2:
if (++p >= e || p[-2] != 0x85 || p[-1] != 0xA0) return FALSE;
break;
case 0xE1:
if (++p >= e || p[-2] != 0x9A || p[-1] != 0x80) return FALSE;
break;
case 0xE2:
if (p+1 >= e) return FALSE;
switch (*p++) {
case 0x80:
switch (*p++) {
default:
return FALSE;
case 0x80: case 0x81: case 0x82: case 0x83: case 0x84:
case 0x85: case 0x86: case 0x87: case 0x88: case 0x89:
case 0x8A: case 0xA8: case 0xA9: case 0xAF:
break;
}
break;
case 0x81:
if (*p++ != 0x9F) return FALSE;
break;
default:
return FALSE;
}
break;
case 0xE3:
if (++p >= e || p[-2] != 0x80 || p[-1] != 0x80) return FALSE;
break;
default:
return FALSE;
}
}
*pp = p;
return TRUE;
}

/*
* call-seq:
* str.blank? -> true or false
*
* Returns <code>true</code> if <i>str</i> is empty or has only (Unicode)
* whitespace characters.
*
* "hello".blank? #=> false
* " ".blank? #=> false
* "\u{0009 000a 000b 000c 000d 0020 0085 00a0 1680}".blank? #=> true
* "\u{2000 2001 2002 2003 2004 2005 2006 2007}".blank? #=> true
* "\u{2008 2009 200a 2028 2029 202f 205f 3000}".blank? #=> true
*
* See also http://api.rubyonrails.org/v4.2/classes/String.html#method-i-blank-3F
*/

VALUE
rb_str_blank(VALUE str)
{
const unsigned char *p = (const unsigned char *)RSTRING_PTR(str);
const unsigned char *e = (const unsigned char *)RSTRING_END(str);
intptr_t pe = (intptr_t)e;
const __m128i mask = _mm_set_epi8(0,0,0,0,0,0,0,0,0,0,9,10,11,12,13,32);
const int masksize = 6;
const int mode = _SIDD_CMP_EQUAL_ANY|_SIDD_UBYTE_OPS|_SIDD_MASKED_NEGATIVE_POLARITY;

if (RSTRING_LEN(str) == 0) return Qtrue;

/* set the edge of a page before the end of string */
if (pe & 0xfff > 0xff1) {
pe &= ~0xfff;
pe |= 0xff1;
}

for (; (intptr_t)p < pe; p += sizeof(__m128i)) {
int idx, len;
ptrdiff_t sz;
__m128i m;

retry:
sz = e - p;
len = (int)((sz&INT_MAX) | (sz >> 27));
m = _mm_loadu_si128((__m128i const *)p);

/* CF: 1 if there's non spaces
* ZF: 1 if reached the end
*/
if (_mm_cmpestra(mask, masksize, m, len, mode)) { /* CF=0 ZF=0 */
continue;
}
#if 0 /* GCC 6 wrongly generates cmpestri and cmpestri... */
if (_mm_cmpestrc(mask, masksize, m, len, mode)) { /* CF=0 ZF=1 */
return Qtrue;
}
idx = _mm_cmpestri(mask, masksize, m, len, mode);
#else
idx = _mm_cmpestri(mask, masksize, m, len, mode);
if (sz < idx) return Qtrue;
#endif
p += idx;
if (!str_blank0(&p, e, FALSE)) return Qfalse;
goto retry;
}
if (!str_blank0(&p, e, TRUE)) return Qfalse;
return Qtrue;
}

/*
* call-seq:
* str + other_str -> new_str
@@ -9675,6 +9792,8 @@ Init_String(void)
rb_define_method(rb_cString, "size", rb_str_length, 0);
rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
rb_define_method(rb_cString, "sttni_blank?", rb_str_blank, 0);
rb_define_method(rb_cString, "opt_sttni_blank?", rb_str_blank, 0);
rb_define_method(rb_cString, "=~", rb_str_match, 1);
rb_define_method(rb_cString, "match", rb_str_match_m, -1);
rb_define_method(rb_cString, "succ", rb_str_succ, 0);
1 vm.c
@@ -1477,6 +1477,7 @@ vm_init_redefined_flag(void)
OP(ASET, ASET), (C(Array), C(Hash));
OP(Length, LENGTH), (C(Array), C(String), C(Hash));
OP(Size, SIZE), (C(Array), C(String), C(Hash));
OP(Opt_sttni_blankP, OPT_STTNI_BLANK_P), (C(String));
OP(EmptyP, EMPTY_P), (C(Array), C(String), C(Hash));
OP(Succ, SUCC), (C(Integer), C(String), C(Time));
OP(EqTilde, MATCH), (C(Regexp), C(String));
@@ -445,6 +445,7 @@ enum ruby_basic_operators {
BOP_ASET,
BOP_LENGTH,
BOP_SIZE,
BOP_OPT_STTNI_BLANK_P,
BOP_EMPTY_P,
BOP_SUCC,
BOP_GT,

0 comments on commit e6bc209

Please sign in to comment.
You can’t perform that action at this time.