Skip to content

Commit

Permalink
Add MessagePackMessageSerializer for binary data
Browse files Browse the repository at this point in the history
Serialize data to the MessagePack format, for efficient storage in
binary columns.

The binary encoding requires around 30% less space than the base64
encoding used by the default serializer.

To prevent it being used with text columns, validate that we only try
to store binary data in binary columns.
  • Loading branch information
djmb committed Feb 16, 2024
1 parent 9e01d93 commit 10e5e7a
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 1 deletion.
9 changes: 9 additions & 0 deletions activerecord/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
* Add ActiveRecord::Encryption::MessagePackMessageSerializer

Serialize data to the MessagePack format, for efficient storage in binary columns.

The binary encoding requires around 30% less space than the base64 encoding
used by the default serializer.

*Donal McBreen*

* Add support for encrypting binary columns

Ensure encryption and decryption pass `Type::Binary::Data` around for binary data.
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,11 @@ def serialize_with_current(value)

def encrypt_as_text(value)
with_context do
encryptor.encrypt(value, **encryption_options)
encryptor.encrypt(value, **encryption_options).tap do |encrypted|
if !cast_type.binary? && encrypted.encoding == Encoding::BINARY
raise Errors::Encoding, "Binary encoded data can only be stored in binary columns"
end
end
end
end

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# frozen_string_literal: true

require "active_support/message_pack"

module ActiveRecord
module Encryption
# A message serializer that serializes +Messages+ with MessagePack.
#
# The message is converted to a hash with this structure:
#
# {
# p: <payload>,
# h: {
# header1: value1,
# header2: value2,
# ...
# }
# }
#
# Then it is converted to the MessagePack format.
class MessagePackMessageSerializer
def dump(message)
raise Errors::ForbiddenClass unless message.is_a?(Message)
ActiveSupport::MessagePack.dump(message_to_hash(message))
end

def load(serialized_content)
data = ActiveSupport::MessagePack.load(serialized_content)
hash_to_message(data, 1)
rescue RuntimeError
raise Errors::Decryption
end

private
def message_to_hash(message)
{
"p" => message.payload,
"h" => headers_to_hash(message.headers)
}
end

def headers_to_hash(headers)
headers.transform_values do |value|
value.is_a?(Message) ? message_to_hash(value) : value
end
end

def hash_to_message(data, level)
validate_message_data_format(data, level)
Message.new(payload: data["p"], headers: parse_properties(data["h"], level))
end

def validate_message_data_format(data, level)
if level > 2
raise Errors::Decryption, "More than one level of hash nesting in headers is not supported"
end

unless data.is_a?(Hash) && data.has_key?("p")
raise Errors::Decryption, "Invalid data format: hash without payload"
end
end

def parse_properties(headers, level)
Properties.new.tap do |properties|
headers&.each do |key, value|
properties[key] = value.is_a?(Hash) ? hash_to_message(value, level + 1) : value
end
end
end
end
end
end
24 changes: 24 additions & 0 deletions activerecord/test/cases/encryption/encryptable_record_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require "models/book_encrypted"
require "models/post_encrypted"
require "models/traffic_light_encrypted"
require "active_record/encryption/message_pack_message_serializer"

class ActiveRecord::Encryption::EncryptableRecordTest < ActiveRecord::EncryptionTestCase
fixtures :encrypted_books, :posts
Expand Down Expand Up @@ -413,6 +414,29 @@ def name
assert_equal json_bytes, EncryptedBookWithSerializedBinary.create!(logo: json_bytes).logo
end

test "binary data can be serialized with message pack" do
all_bytes = (0..255).map(&:chr).join
assert_equal all_bytes, EncryptedBookWithBinaryMessagePackSerialized.create!(logo: all_bytes).logo
end

test "binary data can be encrypted uncompressed and serialized with message pack" do
low_bytes = (0..127).map(&:chr).join
high_bytes = (128..255).map(&:chr).join
assert_equal low_bytes, EncryptedBookWithBinaryMessagePackSerialized.create!(logo: low_bytes).logo
assert_equal high_bytes, EncryptedBookWithBinaryMessagePackSerialized.create!(logo: high_bytes).logo
end

test "text columns cannot be serialized with message pack" do
assert_raises(ActiveRecord::Encryption::Errors::Encoding) do
message_pack_serialized_text_class = Class.new(ActiveRecord::Base) do
self.table_name = "encrypted_books"

encrypts :name, message_serializer: ActiveRecord::Encryption::MessagePackMessageSerializer.new
end
message_pack_serialized_text_class.create(name: "Dune")
end
end

private
def build_derived_key_provider_with(hash_digest_class)
ActiveRecord::Encryption.with_encryption_context(key_generator: ActiveRecord::Encryption::KeyGenerator.new(hash_digest_class: hash_digest_class)) do
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# frozen_string_literal: true

require "cases/encryption/helper"
require "base64"
require "active_record/encryption/message_pack_message_serializer"

class ActiveRecord::Encryption::MessagePackMessageSerializerTest < ActiveRecord::EncryptionTestCase
setup do
@serializer = ActiveRecord::Encryption::MessagePackMessageSerializer.new
end

test "serializes messages" do
message = build_message
deserialized_message = serialize_and_deserialize(message)
assert_equal message, deserialized_message
end

test "serializes messages with nested messages in their headers" do
message = build_message
message.headers[:other_message] = ActiveRecord::Encryption::Message.new(payload: "some other secret payload", headers: { some_header: "some other value" })

deserialized_message = serialize_and_deserialize(message)
assert_equal message, deserialized_message
end

test "detects random data and raises a decryption error" do
assert_raises ActiveRecord::Encryption::Errors::Decryption do
@serializer.load "hey there"
end
end

test "detects random JSON hashes and raises a decryption error" do
assert_raises ActiveRecord::Encryption::Errors::Decryption do
@serializer.load JSON.dump({ some: "other data" })
end
end

test "raises a TypeError when trying to deserialize other data types" do
assert_raises TypeError do
@serializer.load(:it_can_only_deserialize_strings)
end
end

test "raises ForbiddenClass when trying to serialize other data types" do
assert_raises ActiveRecord::Encryption::Errors::ForbiddenClass do
@serializer.dump("it can only serialize messages!")
end
end

test "raises Decryption when trying to parse message with more than one nested message" do
message = build_message
message.headers[:other_message] = ActiveRecord::Encryption::Message.new(payload: "some other secret payload", headers: { some_header: "some other value" })
message.headers[:other_message].headers[:yet_another_message] = ActiveRecord::Encryption::Message.new(payload: "yet some other secret payload", headers: { some_header: "yet some other value" })

assert_raises ActiveRecord::Encryption::Errors::Decryption do
serialize_and_deserialize(message)
end
end

private
def build_message
payload = "some payload"
headers = { key_1: "1" }
ActiveRecord::Encryption::Message.new(payload: payload, headers: headers)
end

def serialize_and_deserialize(message, with: @serializer)
@serializer.load @serializer.dump(message)
end
end

0 comments on commit 10e5e7a

Please sign in to comment.