From 443e1b481c1cc5d213265e0f5a629aaa04b96923 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 3 May 2024 17:36:57 +0200 Subject: [PATCH] PoC: Index Result rows rather than to convert them into hashes Using the samebenchmark as https://github.com/rails/rails/pull/51726 A significant part of the memory footprint comes from `Result#each`: ``` Total allocated: 4.61 MB (43077 objects) Total retained: 3.76 MB (27621 objects) allocated memory by file ----------------------------------- 391.52 kB activerecord/lib/active_record/result.rb retained memory by file ----------------------------------- 374.40 kB activerecord/lib/active_record/result.rb ``` Rows are initially stored as arrays, but `Result#each` convert them to hashes. Depending on how many elements they contain, hashes use between 2 and 5 times as much memory than arrays. | Length | Hash | Array | Diff | | --------- | --------- | --------- | --------- | | 1 | 160B | 40B | -120B | | 2 | 160B | 40B | -120B | | 3 | 160B | 40B | -120B | | 4 | 160B | 80B | -80B | | 5 | 160B | 80B | -80B | | 6 | 160B | 80B | -80B | | 7 | 160B | 80B | -80B | | 8 | 160B | 80B | -80B | | 9 | 464B | 160B | -304B | | 10 | 464B | 160B | -304B | | 11 | 464B | 160B | -304B | | 12 | 464B | 160B | -304B | | 13 | 464B | 160B | -304B | | 14 | 464B | 160B | -304B | | 15 | 464B | 160B | -304B | | 16 | 464B | 160B | -304B | | 17 | 912B | 160B | -752B | | 18 | 912B | 160B | -752B | | 19 | 912B | 320B | -592B | | 20 | 912B | 320B | -592B | | 21 | 912B | 320B | -592B | | 22 | 912B | 320B | -592B | | 23 | 912B | 320B | -592B | | 24 | 912B | 320B | -592B | | 25 | 912B | 320B | -592B | | 26 | 912B | 320B | -592B | | 27 | 912B | 320B | -592B | | 28 | 912B | 320B | -592B | | 29 | 912B | 320B | -592B | | 30 | 912B | 320B | -592B | | 31 | 912B | 320B | -592B | | 32 | 912B | 320B | -592B | | 33 | 1744B | 320B | -1424B | | 34 | 1744B | 320B | -1424B | | 35 | 1744B | 320B | -1424B | | 36 | 1744B | 320B | -1424B | | 37 | 1744B | 320B | -1424B | | 38 | 1744B | 320B | -1424B | | 39 | 1744B | 640B | -1104B | | 40 | 1744B | 640B | -1104B | | 41 | 1744B | 640B | -1104B | | 42 | 1744B | 640B | -1104B | | 43 | 1744B | 640B | -1104B | | 44 | 1744B | 640B | -1104B | | 45 | 1744B | 640B | -1104B | | 46 | 1744B | 640B | -1104B | | 47 | 1744B | 640B | -1104B | | 48 | 1744B | 640B | -1104B | | 49 | 1744B | 640B | -1104B | | 50 | 1744B | 640B | -1104B | Rather than to convert rows into hashes, we can loopkup the column index into a single hash common to all rows. To not complexify the code too much, rather than to pass the row array and the column index, we wrap both into an `IndexedRow` object, which uses an extra `40B` object, but that's still less memory even in the worst case. After: ``` Total allocated: 4.32 MB (43079 objects) Total retained: 3.65 MB (29725 objects) allocated memory by file ----------------------------------- 101.66 kB activerecord/lib/active_record/result.rb retained memory by file ----------------------------------- 84.70 kB activerecord/lib/active_record/result.rb ``` As for access speed, it's of course a bit slower, but not by much, it's between `1.5` and `2` times slower, but remains in the 10's of M iterations per second, so I think this overhead is negligible compared to all the work needed to access a model attribute. Also the `LazyAttributeSet` class only access this once, after the attribute is casted, the resulting value is still stored in a regular `Hash`. --- .../connection_adapters/sqlite3_adapter.rb | 2 + activerecord/lib/active_record/result.rb | 52 ++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb b/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb index fc172fee00d32..d02466ffa3ab1 100644 --- a/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb +++ b/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb @@ -691,6 +691,8 @@ def table_structure_with_collation(table_name, basic_structure) end basic_structure.map do |column| + column = column.to_hash + column_name = column["name"] if collation_hash.has_key? column_name diff --git a/activerecord/lib/active_record/result.rb b/activerecord/lib/active_record/result.rb index 617c53797ecd3..1d2ee445d2cc6 100644 --- a/activerecord/lib/active_record/result.rb +++ b/activerecord/lib/active_record/result.rb @@ -36,6 +36,53 @@ module ActiveRecord class Result include Enumerable + class IndexedRow + def initialize(column_indexes, row) + @column_indexes = column_indexes + @row = row + end + + def each_key(&block) + @column_indexes.each_key(&block) + end + + def keys + @column_indexes.keys + end + + def ==(other) + if other.is_a?(Hash) + to_hash == other + else + super + end + end + + def key?(column) + @column_indexes.key?(column) + end + + def fetch(column) + if index = @column_indexes[column] + @row[index] + elsif block_given? + yield + else + raise KeyError, "key not found: #{column.inspect}" + end + end + + def [](column) + if index = @column_indexes[column] + @row[index] + end + end + + def to_hash + @column_indexes.transform_values { |index| @row[index] } + end + end + attr_reader :columns, :rows, :column_types def self.empty(async: false) # :nodoc: @@ -170,8 +217,9 @@ def column_type(name, index, type_overrides) def hash_rows # We use transform_values to rows. # This is faster because we avoid any reallocs and avoid hashing entirely. - @hash_rows ||= @rows.map do |row| - column_indexes.transform_values { |index| row[index] } + @hash_rows ||= begin + columns = column_indexes + @rows.map { |row| IndexedRow.new(columns, row) } end end