From 443e1b481c1cc5d213265e0f5a629aaa04b96923 Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@gmail.com>
Date: Fri, 3 May 2024 17:36:57 +0200
Subject: [PATCH] PoC: Index Result rows rather than to convert them into
 hashes

Using the samebenchmark as https://github.com/rails/rails/pull/51726

A significant part of the memory footprint comes from `Result#each`:

```
Total allocated: 4.61 MB (43077 objects)
Total retained: 3.76 MB (27621 objects)

allocated memory by file
-----------------------------------
 391.52 kB  activerecord/lib/active_record/result.rb

retained memory by file
-----------------------------------
 374.40 kB  activerecord/lib/active_record/result.rb
```

Rows are initially stored as arrays, but `Result#each` convert them
to hashes. Depending on how many elements they contain, hashes use
between 2 and 5 times as much memory than arrays.

|    Length |      Hash |     Array |      Diff |
| --------- | --------- | --------- | --------- |
|         1 |       160B |        40B |      -120B |
|         2 |       160B |        40B |      -120B |
|         3 |       160B |        40B |      -120B |
|         4 |       160B |        80B |       -80B |
|         5 |       160B |        80B |       -80B |
|         6 |       160B |        80B |       -80B |
|         7 |       160B |        80B |       -80B |
|         8 |       160B |        80B |       -80B |
|         9 |       464B |       160B |      -304B |
|        10 |       464B |       160B |      -304B |
|        11 |       464B |       160B |      -304B |
|        12 |       464B |       160B |      -304B |
|        13 |       464B |       160B |      -304B |
|        14 |       464B |       160B |      -304B |
|        15 |       464B |       160B |      -304B |
|        16 |       464B |       160B |      -304B |
|        17 |       912B |       160B |      -752B |
|        18 |       912B |       160B |      -752B |
|        19 |       912B |       320B |      -592B |
|        20 |       912B |       320B |      -592B |
|        21 |       912B |       320B |      -592B |
|        22 |       912B |       320B |      -592B |
|        23 |       912B |       320B |      -592B |
|        24 |       912B |       320B |      -592B |
|        25 |       912B |       320B |      -592B |
|        26 |       912B |       320B |      -592B |
|        27 |       912B |       320B |      -592B |
|        28 |       912B |       320B |      -592B |
|        29 |       912B |       320B |      -592B |
|        30 |       912B |       320B |      -592B |
|        31 |       912B |       320B |      -592B |
|        32 |       912B |       320B |      -592B |
|        33 |      1744B |       320B |     -1424B |
|        34 |      1744B |       320B |     -1424B |
|        35 |      1744B |       320B |     -1424B |
|        36 |      1744B |       320B |     -1424B |
|        37 |      1744B |       320B |     -1424B |
|        38 |      1744B |       320B |     -1424B |
|        39 |      1744B |       640B |     -1104B |
|        40 |      1744B |       640B |     -1104B |
|        41 |      1744B |       640B |     -1104B |
|        42 |      1744B |       640B |     -1104B |
|        43 |      1744B |       640B |     -1104B |
|        44 |      1744B |       640B |     -1104B |
|        45 |      1744B |       640B |     -1104B |
|        46 |      1744B |       640B |     -1104B |
|        47 |      1744B |       640B |     -1104B |
|        48 |      1744B |       640B |     -1104B |
|        49 |      1744B |       640B |     -1104B |
|        50 |      1744B |       640B |     -1104B |

Rather than to convert rows into hashes, we can loopkup the column index
into a single hash common to all rows. To not complexify the code too much,
rather than to pass the row array and the column index, we wrap both into
an `IndexedRow` object, which uses an extra `40B` object, but that's still
less memory even in the worst case.

After:

```
Total allocated: 4.32 MB (43079 objects)
Total retained: 3.65 MB (29725 objects)

allocated memory by file
-----------------------------------
 101.66 kB  activerecord/lib/active_record/result.rb

retained memory by file
-----------------------------------
  84.70 kB  activerecord/lib/active_record/result.rb
```

As for access speed, it's of course a bit slower, but not by much,
it's between `1.5` and `2` times slower, but remains in the 10's of M
iterations per second, so I think this overhead is negligible compared
to all the work needed to access a model attribute.

Also the `LazyAttributeSet` class only access this once, after the
attribute is casted, the resulting value is still stored in a regular
`Hash`.
---
 .../connection_adapters/sqlite3_adapter.rb    |  2 +
 activerecord/lib/active_record/result.rb      | 52 ++++++++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb b/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb
index fc172fee00d32..d02466ffa3ab1 100644
--- a/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb
+++ b/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb
@@ -691,6 +691,8 @@ def table_structure_with_collation(table_name, basic_structure)
             end
 
             basic_structure.map do |column|
+              column = column.to_hash
+
               column_name = column["name"]
 
               if collation_hash.has_key? column_name
diff --git a/activerecord/lib/active_record/result.rb b/activerecord/lib/active_record/result.rb
index 617c53797ecd3..1d2ee445d2cc6 100644
--- a/activerecord/lib/active_record/result.rb
+++ b/activerecord/lib/active_record/result.rb
@@ -36,6 +36,53 @@ module ActiveRecord
   class Result
     include Enumerable
 
+    class IndexedRow
+      def initialize(column_indexes, row)
+        @column_indexes = column_indexes
+        @row = row
+      end
+
+      def each_key(&block)
+        @column_indexes.each_key(&block)
+      end
+
+      def keys
+        @column_indexes.keys
+      end
+
+      def ==(other)
+        if other.is_a?(Hash)
+          to_hash == other
+        else
+          super
+        end
+      end
+
+      def key?(column)
+        @column_indexes.key?(column)
+      end
+
+      def fetch(column)
+        if index = @column_indexes[column]
+          @row[index]
+        elsif block_given?
+          yield
+        else
+          raise KeyError, "key not found: #{column.inspect}"
+        end
+      end
+
+      def [](column)
+        if index = @column_indexes[column]
+          @row[index]
+        end
+      end
+
+      def to_hash
+        @column_indexes.transform_values { |index| @row[index] }
+      end
+    end
+
     attr_reader :columns, :rows, :column_types
 
     def self.empty(async: false) # :nodoc:
@@ -170,8 +217,9 @@ def column_type(name, index, type_overrides)
       def hash_rows
         # We use transform_values to rows.
         # This is faster because we avoid any reallocs and avoid hashing entirely.
-        @hash_rows ||= @rows.map do |row|
-          column_indexes.transform_values { |index| row[index] }
+        @hash_rows ||= begin
+          columns = column_indexes
+          @rows.map { |row| IndexedRow.new(columns, row) }
         end
       end