Skip to content

Commit

Permalink
Add key_hash for smaller index
Browse files Browse the repository at this point in the history
This adds a key_hash column to the solid_cache_entries table. This
column is a 64-bit integer that is a truncated SHA256 hash of the key.
This allows us to use a smaller index than the 1024 byte key column.

The byte_size column is also added to the table. This is the size of
the value in bytes. This is for a separate feature but is included in
this commit to piggy back on the migrations.

The migrations themselves are split into three steps. This allows
existing tables to be migrated without downtime.

There is a new configuration field `key_hash_stage` that can be set to
`:ignored`, `:unindexed` or `:indexed`. This controls how the key_hash
column is used and should be set as the migrations are completed.

The default is :indexed, which is what is required for fresh
installations of solid_cache. In this case you can apply all three
migrations at the same time.

In the case of an existing setup however, the steps are:

1. Install the new gem, install the migrations and set
   `config.solid_cache.key_hash_stage = :ignored`
2. Run the first migration, which adds the new columns
3. Update `config.solid_cache.key_hash_stage = :unindexed`
4. Backfill or truncate the table
5. Run the second migration, which adds the indexes and null constraints
6. Update `config.solid_cache.key_hash_stage = :indexed`
7. Run the third migration, which removes the old key index

For later versions of the gem we'll assume that the key_hash column
exists and the `config.solid_cache.key_hash_stage`. We'll also squash
the migrations into a single file.
  • Loading branch information
djmb committed Jan 24, 2024
1 parent 2bfd513 commit 2a2a3a8
Show file tree
Hide file tree
Showing 13 changed files with 267 additions and 35 deletions.
100 changes: 85 additions & 15 deletions app/models/solid_cache/entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ module SolidCache
class Entry < Record
include Expiration

ID_BYTE_SIZE = 8
CREATED_AT_BYTE_SIZE = 8
KEY_HASH_BYTE_SIZE = 8
VALUE_BYTE_SIZE = 4
FIXED_SIZE_COLUMNS_BYTE_SIZE = ID_BYTE_SIZE + CREATED_AT_BYTE_SIZE + KEY_HASH_BYTE_SIZE + VALUE_BYTE_SIZE

self.ignored_columns += [ :key_hash, :byte_size] if SolidCache.key_hash_stage == :ignored

class << self
def write(key, value)
upsert_all_no_query_cache([ { key: key, value: value } ])
Expand All @@ -14,21 +22,23 @@ def write_multi(payloads)
end

def read(key)
select_all_no_query_cache(get_sql, to_binary(key)).first
result = select_all_no_query_cache(get_sql, lookup_value(key)).first
result[1] if result&.first == key
end

def read_multi(keys)
serialized_keys = keys.map { |key| to_binary(key) }
select_all_no_query_cache(get_all_sql(serialized_keys), serialized_keys).to_h
key_hashes = keys.map { |key| lookup_value(key) }
results = select_all_no_query_cache(get_all_sql(key_hashes), key_hashes).to_h
results.except!(results.keys - keys)
end

def delete_by_key(key)
delete_no_query_cache(:key, to_binary(key))
delete_no_query_cache(lookup_column, lookup_value(key))
end

def delete_multi(keys)
serialized_keys = keys.map { |key| to_binary(key) }
delete_no_query_cache(:key, serialized_keys)
serialized_keys = keys.map { |key| lookup_value(key) }
delete_no_query_cache(lookup_column, serialized_keys)
end

def clear_truncate
Expand All @@ -42,7 +52,8 @@ def clear_delete
def increment(key, amount)
transaction do
uncached do
amount += lock.where(key: key).pick(:value).to_i
result = lock.where(lookup_column => lookup_value(key)).pick(:key, :value)
amount += result[1].to_i if result&.first == key
write(key, amount)
amount
end
Expand All @@ -54,35 +65,85 @@ def decrement(key, amount)
end

private
def upsert_all_no_query_cache(attributes)
insert_all = ActiveRecord::InsertAll.new(self, attributes, unique_by: upsert_unique_by, on_duplicate: :update, update_only: [ :value ])
def upsert_all_no_query_cache(payloads)
insert_all = ActiveRecord::InsertAll.new(
self,
add_key_hash_and_byte_size(payloads),
unique_by: upsert_unique_by,
on_duplicate: :update,
update_only: upsert_update_only
)
sql = connection.build_insert_sql(ActiveRecord::InsertAll::Builder.new(insert_all))

message = +"#{self} "
message << "Bulk " if attributes.many?
message << "Bulk " if payloads.many?
message << "Upsert"
# exec_query_method does not clear the query cache, exec_insert_all does
connection.send exec_query_method, sql, message
end

def add_key_hash_and_byte_size(payloads)
payloads.map do |payload|
payload.dup.tap do |payload|
if key_hash?
payload[:key_hash] = key_hash_for(payload[:key])
payload[:byte_size] = byte_size_for(payload)
end
end
end
end

def key_hash?
@key_hash ||= [ :indexed, :unindexed ].include?(SolidCache.key_hash_stage) &&
connection.column_exists?(table_name, :key_hash)
end

def key_hash_indexed?
SolidCache.key_hash_stage == :indexed
end

def lookup_column
key_hash_indexed? ? :key_hash : :key
end

def lookup_value(key)
key_hash_indexed? ? key_hash_for(key) : to_binary(key)
end

def lookup_placeholder
key_hash_indexed? ? 1 : "placeholder"
end

def exec_query_method
connection.respond_to?(:internal_exec_query) ? :internal_exec_query : :exec_query
end

def upsert_unique_by
connection.supports_insert_conflict_target? ? :key : nil
connection.supports_insert_conflict_target? ? lookup_column : nil
end

def upsert_update_only
if key_hash_indexed?
[ :key, :value, :byte_size ]
elsif key_hash?
[ :value, :key_hash, :byte_size ]
else
[ :value ]
end
end

def get_sql
@get_sql ||= build_sql(where(key: "placeholder").select(:value))
@get_sql ||= {}
@get_sql[lookup_column] ||= build_sql(where(lookup_column => lookup_placeholder).select(:key, :value))
end

def get_all_sql(keys)
def get_all_sql(key_hashes)
if connection.prepared_statements?
@get_all_sql_binds ||= {}
@get_all_sql_binds[keys.count] ||= build_sql(where(key: keys).select(:key, :value))
@get_all_sql_binds[[key_hashes.count, lookup_column]] ||= build_sql(where(lookup_column => key_hashes).select(:key, :value))
else
@get_all_sql_no_binds ||= build_sql(where(key: [ "placeholder1", "placeholder2" ]).select(:key, :value)).gsub("?, ?", "?")
@get_all_sql_no_binds ||= {}
@get_all_sql_no_binds[lookup_column] ||= build_sql(where(lookup_column => [ lookup_placeholder, lookup_placeholder ]).select(:key, :value)).gsub("?, ?", "?")
end
end

Expand Down Expand Up @@ -124,6 +185,15 @@ def delete_no_query_cache(attribute, values)
def to_binary(key)
ActiveModel::Type::Binary.new.serialize(key)
end

def key_hash_for(key)
# Need to unpack this as a signed integer - Postgresql and SQLite don't support unsigned integers
Digest::SHA256.digest(key.to_s).unpack("q>").first
end

def byte_size_for(payload)
payload[:key].to_s.bytesize + payload[:value].to_s.bytesize + FIXED_SIZE_COLUMNS_BYTE_SIZE
end
end
end
end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class AddKeyHashAndByteSizeToSolidCacheEntries < ActiveRecord::Migration[7.1]
def change
change_table :solid_cache_entries do |t|
t.column :key_hash, :integer, null: true, limit: 8
t.column :byte_size, :integer, null: true, limit: 4
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class AddKeyHashAndByteSizeIndexesAndNullConstraintsToSolidCacheEntries < ActiveRecord::Migration[7.1]
def change
change_table :solid_cache_entries, bulk: true do |t|
t.change_null :key_hash, false
t.change_null :byte_size, false
t.index :key_hash, unique: true
t.index [:key_hash, :byte_size]
t.index :byte_size
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class RemoveKeyIndexFromSolidCacheEntries < ActiveRecord::Migration[7.1]
def change
change_table :solid_cache_entries do |t|
t.remove_index :key
end
end
end
1 change: 1 addition & 0 deletions lib/solid_cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

module SolidCache
mattr_accessor :executor, :connects_to
mattr_accessor :key_hash_stage, default: :indexed

def self.all_shard_keys
all_shards_config&.keys || []
Expand Down
6 changes: 6 additions & 0 deletions lib/solid_cache/engine.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ class Engine < ::Rails::Engine

SolidCache.executor = config.solid_cache.executor
SolidCache.connects_to = config.solid_cache.connects_to
if config.solid_cache.key_hash_stage
unless [:ignored, :unindexed, :indexed].include?(config.solid_cache.key_hash_stage)
raise "ArgumentError, :key_hash_stage must be :ignored, :unindexed or :indexed"
end
SolidCache.key_hash_stage = config.solid_cache.key_hash_stage
end
end

config.after_initialize do
Expand Down
12 changes: 8 additions & 4 deletions test/dummy/db/primary_shard_one_schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.0].define(version: 2023_07_24_121448) do
create_table "solid_cache_entries", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
ActiveRecord::Schema[7.0].define(version: 2024_01_10_111702) do
create_table "solid_cache_entries", force: :cascade do |t|
t.binary "key", limit: 1024, null: false
t.binary "value", size: :long, null: false
t.binary "value", limit: 536870912, null: false
t.datetime "created_at", null: false
t.index ["key"], name: "index_solid_cache_entries_on_key", unique: true
t.integer "key_hash", limit: 8, null: false
t.integer "byte_size", limit: 4, null: false
t.index ["byte_size"], name: "index_solid_cache_entries_on_byte_size"
t.index ["key_hash", "byte_size"], name: "index_solid_cache_entries_on_key_hash_and_byte_size"
t.index ["key_hash"], name: "index_solid_cache_entries_on_key_hash", unique: true
end

end
12 changes: 8 additions & 4 deletions test/dummy/db/primary_shard_two_schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.0].define(version: 2023_07_24_121448) do
create_table "solid_cache_entries", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
ActiveRecord::Schema[7.0].define(version: 2024_01_10_111702) do
create_table "solid_cache_entries", force: :cascade do |t|
t.binary "key", limit: 1024, null: false
t.binary "value", size: :long, null: false
t.binary "value", limit: 536870912, null: false
t.datetime "created_at", null: false
t.index ["key"], name: "index_solid_cache_entries_on_key", unique: true
t.integer "key_hash", limit: 8, null: false
t.integer "byte_size", limit: 4, null: false
t.index ["byte_size"], name: "index_solid_cache_entries_on_byte_size"
t.index ["key_hash", "byte_size"], name: "index_solid_cache_entries_on_key_hash_and_byte_size"
t.index ["key_hash"], name: "index_solid_cache_entries_on_key_hash", unique: true
end

end
12 changes: 8 additions & 4 deletions test/dummy/db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.0].define(version: 2023_07_24_121448) do
create_table "solid_cache_entries", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
ActiveRecord::Schema[7.0].define(version: 2024_01_10_111702) do
create_table "solid_cache_entries", force: :cascade do |t|
t.binary "key", limit: 1024, null: false
t.binary "value", size: :long, null: false
t.binary "value", limit: 536870912, null: false
t.datetime "created_at", null: false
t.index ["key"], name: "index_solid_cache_entries_on_key", unique: true
t.integer "key_hash", limit: 8, null: false
t.integer "byte_size", limit: 4, null: false
t.index ["byte_size"], name: "index_solid_cache_entries_on_byte_size"
t.index ["key_hash", "byte_size"], name: "index_solid_cache_entries_on_key_hash_and_byte_size"
t.index ["key_hash"], name: "index_solid_cache_entries_on_key_hash", unique: true
end

end
12 changes: 8 additions & 4 deletions test/dummy/db/secondary_shard_one_schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.0].define(version: 2023_07_24_121448) do
create_table "solid_cache_entries", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
ActiveRecord::Schema[7.0].define(version: 2024_01_10_111702) do
create_table "solid_cache_entries", force: :cascade do |t|
t.binary "key", limit: 1024, null: false
t.binary "value", size: :long, null: false
t.binary "value", limit: 536870912, null: false
t.datetime "created_at", null: false
t.index ["key"], name: "index_solid_cache_entries_on_key", unique: true
t.integer "key_hash", limit: 8, null: false
t.integer "byte_size", limit: 4, null: false
t.index ["byte_size"], name: "index_solid_cache_entries_on_byte_size"
t.index ["key_hash", "byte_size"], name: "index_solid_cache_entries_on_key_hash_and_byte_size"
t.index ["key_hash"], name: "index_solid_cache_entries_on_key_hash", unique: true
end

end
12 changes: 8 additions & 4 deletions test/dummy/db/secondary_shard_two_schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.0].define(version: 2023_07_24_121448) do
create_table "solid_cache_entries", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
ActiveRecord::Schema[7.0].define(version: 2024_01_10_111702) do
create_table "solid_cache_entries", force: :cascade do |t|
t.binary "key", limit: 1024, null: false
t.binary "value", size: :long, null: false
t.binary "value", limit: 536870912, null: false
t.datetime "created_at", null: false
t.index ["key"], name: "index_solid_cache_entries_on_key", unique: true
t.integer "key_hash", limit: 8, null: false
t.integer "byte_size", limit: 4, null: false
t.index ["byte_size"], name: "index_solid_cache_entries_on_byte_size"
t.index ["key_hash", "byte_size"], name: "index_solid_cache_entries_on_key_hash_and_byte_size"
t.index ["key_hash"], name: "index_solid_cache_entries_on_key_hash", unique: true
end

end
24 changes: 24 additions & 0 deletions test/models/solid_cache/entry_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,30 @@ class EntryTest < ActiveSupport::TestCase
assert_equal 0, uncached_entry_count
end

test "handles key_hash collisions" do
Entry.stubs(:key_hash_for).with("hello".b).returns(1)
Entry.stubs(:key_hash_for).with("hi".b).returns(1)

Entry.write "hello".b, "there"
assert_equal "there", Entry.read("hello".b)
assert_nil Entry.read("hi".b)
assert_equal({ "hello" => "there" }, Entry.read_multi([ "hello", "hi" ]))

Entry.write "hi".b, "now"
assert_nil Entry.read("hello".b)
assert_equal "now", Entry.read("hi".b)
assert_equal({ "hi" => "now" }, Entry.read_multi([ "hello", "hi" ]))
end

test "byte_size" do
Entry.write "hello".b, "test"
assert_equal 37, Entry.uncached { Entry.last.byte_size }
Entry.write "hello".b, "12345"
assert_equal 38, Entry.uncached { Entry.last.byte_size }
Entry.write "hi".b, "12345"
assert_equal 35, Entry.uncached { Entry.last.byte_size }
end

private
def write_entries(count = 20)
Entry.write_multi(count.times.map { |i| { key: "key#{i}", value: "value#{i}" } })
Expand Down
Loading

0 comments on commit 2a2a3a8

Please sign in to comment.