From 9ef7f406e6b2ed6cdc237c281bc5b3ad36c67e2a Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sun, 11 Apr 2021 19:34:07 +0200 Subject: [PATCH] Optimize the HStore parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test string from taken from the the test suite: `"\"a\\\\b\"=>\"b\\\\ar\", \"1\\\"foo\"=>\"2\""` Bench: ``` Warming up -------------------------------------- original 6.896k i/100ms patched 15.787k i/100ms Calculating ------------------------------------- original 68.176k (± 1.2%) i/s - 344.800k in 5.058270s patched 157.786k (± 1.0%) i/s - 789.350k in 5.003144s Comparison: patched: 157786.0 i/s original: 68176.1 i/s - 2.31x (± 0.00) slower ``` Memory before: allocated 4376 bytes (55 objects) Memory after: allocated: 1880 bytes (23 objects) --- .../postgresql/oid/hstore.rb | 21 ++++++---- .../cases/adapters/postgresql/hstore_test.rb | 42 +++++++------------ 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/activerecord/lib/active_record/connection_adapters/postgresql/oid/hstore.rb b/activerecord/lib/active_record/connection_adapters/postgresql/oid/hstore.rb index 8d4dacbd6435a..1fbf3ffe9db6a 100644 --- a/activerecord/lib/active_record/connection_adapters/postgresql/oid/hstore.rb +++ b/activerecord/lib/active_record/connection_adapters/postgresql/oid/hstore.rb @@ -13,11 +13,17 @@ def type def deserialize(value) if value.is_a?(::String) - ::Hash[value.scan(HstorePair).map { |k, v| - v = v.upcase == "NULL" ? nil : v.gsub(/\A"(.*)"\Z/m, '\1').gsub(/\\(.)/, '\1') - k = k.gsub(/\A"(.*)"\Z/m, '\1').gsub(/\\(.)/, '\1') - [k, v] - }] + hash = {} + value.scan(HstorePair) do |key, value| + key.gsub!('\"', '"') + key.gsub!('\\\\', '\\') + + value&.gsub!('\"', '"') + value&.gsub!('\\\\', '\\') + + hash[key] = value + end + hash else value end @@ -47,9 +53,8 @@ def changed_in_place?(raw_old_value, new_value) private HstorePair = begin - quoted_string = /"[^"\\]*(?:\\.[^"\\]*)*"/ - unquoted_string = /(?:\\.|[^\s,])[^\s=,\\]*(?:\\.[^\s=,\\]*|=[^,>])*/ - /(#{quoted_string}|#{unquoted_string})\s*=>\s*(#{quoted_string}|#{unquoted_string})/ + quoted_string = /"([^"\\]*(?:\\.[^"\\]*)*)"/ + /#{quoted_string}\s*=>\s*(?:(?=NULL)|#{quoted_string})/ end def escape_hstore(value) diff --git a/activerecord/test/cases/adapters/postgresql/hstore_test.rb b/activerecord/test/cases/adapters/postgresql/hstore_test.rb index 671d8211a78c5..30885918e1976 100644 --- a/activerecord/test/cases/adapters/postgresql/hstore_test.rb +++ b/activerecord/test/cases/adapters/postgresql/hstore_test.rb @@ -111,8 +111,8 @@ def test_cast_value_on_write def test_type_cast_hstore assert_equal({ "1" => "2" }, @type.deserialize("\"1\"=>\"2\"")) assert_equal({}, @type.deserialize("")) - assert_equal({ "key" => nil }, @type.deserialize("key => NULL")) - assert_equal({ "c" => "}", '"a"' => 'b "a b' }, @type.deserialize(%q(c=>"}", "\"a\""=>"b \"a b"))) + assert_cycle("key" => nil) + assert_cycle("c" => "}", '"a"' => 'b "a b') end def test_with_store_accessors @@ -198,48 +198,36 @@ def test_hstore_dirty_from_database_equal assert_not_predicate hstore, :changed? end - def test_gen1 - assert_equal('" "=>""', @type.serialize(" " => "")) + def test_spaces + assert_cycle(" " => " ") end - def test_gen2 - assert_equal('","=>""', @type.serialize("," => "")) + def test_commas + assert_cycle("," => "") end - def test_gen3 - assert_equal('"="=>""', @type.serialize("=" => "")) + def test_signs + assert_cycle("=" => ">") end - def test_gen4 - assert_equal('">"=>""', @type.serialize(">" => "")) + def test_various_null + assert_cycle({ "a" => nil, "b" => nil, "c" => "NuLl", "null" => "c" }) end - def test_parse1 - assert_equal({ "a" => nil, "b" => nil, "c" => "NuLl", "null" => "c" }, @type.deserialize('a=>null,b=>NuLl,c=>"NuLl",null=>c')) - end - - def test_parse2 - assert_equal({ " " => " " }, @type.deserialize("\\ =>\\ ")) - end - - def test_parse3 - assert_equal({ "=" => ">" }, @type.deserialize("==>>")) - end - - def test_parse4 - assert_equal({ "=a" => "q=w" }, @type.deserialize('\=a=>q=w')) + def test_equal_signs + assert_cycle("=a" => "q=w") end def test_parse5 - assert_equal({ "=a" => "q=w" }, @type.deserialize('"=a"=>q\=w')) + assert_cycle("=a" => "q=w") end def test_parse6 - assert_equal({ "\"a" => "q>w" }, @type.deserialize('"\"a"=>q>w')) + assert_cycle("\"a" => "q>w") end def test_parse7 - assert_equal({ "\"a" => "q\"w" }, @type.deserialize('\"a=>q"w')) + assert_cycle("\"a" => "q\"w") end def test_rewrite