Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

encoding and stuff #66

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions bench/encode.exs
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,21 @@ types = [:u32, :string, :datetime, {:array, :string}]

Benchee.run(
%{
"csv stream" => fn ->
rows |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run()
end,
"csv" => fn ->
NimbleCSV.RFC4180.dump_to_iodata(rows)
end,
"encode_rows + csv stream" => fn ->
rows |> Stream.map(&CSV.encode_row/1) |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run()
end,
"encode_rows + csv" => fn ->
rows |> CSV.encode_rows() |> NimbleCSV.RFC4180.dump_to_iodata()
end,
"row_binary stream" => fn ->
rows |> Stream.map(&RowBinary.encode_row(&1, types)) |> Stream.run()
end,
# "csv stream" => fn ->
# rows |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run()
# end,
# "csv" => fn ->
# NimbleCSV.RFC4180.dump_to_iodata(rows)
# end,
# "encode_rows + csv stream" => fn ->
# rows |> Stream.map(&CSV.encode_row/1) |> NimbleCSV.RFC4180.dump_to_stream() |> Stream.run()
# end,
# "encode_rows + csv" => fn ->
# rows |> CSV.encode_rows() |> NimbleCSV.RFC4180.dump_to_iodata()
# end,
# "row_binary stream" => fn ->
# rows |> Stream.map(&RowBinary.encode_row(&1, types)) |> Stream.run()
# end,
"row_binary" => fn ->
RowBinary.encode_rows(rows, types)
end
Expand Down
101 changes: 60 additions & 41 deletions lib/ch/row_binary.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,29 @@ defmodule Ch.RowBinary do
def encode_row([] = done, []), do: done

def encode_rows([row | rows], types), do: encode_rows(row, types, rows, types)
def encode_rows([] = done, _types), do: done
def encode_rows([] = empty, _types), do: empty

defp encode_rows([el | els], [t | ts], rows, types) do
[encode(t, el) | encode_rows(els, ts, rows, types)]
end

defp encode_rows([], [], rows, types), do: encode_rows(rows, types)
defp encode_rows([], [], [row | rows], types) do
encode_rows(row, types, rows, types)
end

def encode(:varint, num) when is_integer(num) and num < 128, do: <<num>>
defp encode_rows([], [], [] = done, _types), do: done

def encode(:varint, num) when is_integer(num) do
[<<1::1, num::7>> | encode(:varint, num >>> 7)]
end
@doc false
def encode_varint(num) when num <= 0x7F, do: num
def encode_varint(num), do: [num &&& 0x7F | encode_varint_cont(num >>> 7)]
defp encode_varint_cont(num) when num <= 0x7F, do: [num]
defp encode_varint_cont(num), do: [num &&& 0x7F | encode_varint_cont(num >>> 7)]

def encode(type, str) when type in [:string, :binary] do
case str do
_ when is_binary(str) -> [encode(:varint, byte_size(str)) | str]
_ when is_list(str) -> [encode(:varint, IO.iodata_length(str)) | str]
nil -> <<0>>
_ when is_binary(str) -> [encode_varint(byte_size(str)) | str]
_ when is_list(str) -> [encode_varint(IO.iodata_length(str)) | str]
nil -> 0
end
end

Expand All @@ -51,7 +55,7 @@ defmodule Ch.RowBinary do

def encode(string(size: size), nil), do: <<0::size(size * 8)>>

for size <- [8, 16, 32, 64, 128, 256] do
for size <- [32, 64, 8, 16, 128, 256] do
def encode(unquote(:"u#{size}"), i) when is_integer(i) do
<<i::unquote(size)-little>>
end
Expand All @@ -72,34 +76,27 @@ defmodule Ch.RowBinary do
def encode(unquote(:"f#{size}"), nil), do: <<0::unquote(size)>>
end

def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp})
when scale == -exp do
i = sign * coef
<<i::size(size)-little>>
end

def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp})
when exp >= 0 do
i = sign * coef * round(:math.pow(10, exp + scale))
<<i::size(size)-little>>
def encode(:boolean, bool) do
case bool do
true -> 1
false -> 0
nil -> 0
end
end

def encode(decimal(scale: scale) = t, %Decimal{} = d) do
encode(t, Decimal.round(d, scale))
def encode({:array, type}, l) do
case l do
[_ | _] -> [encode_varint(length(l)) | encode_many(type, l)]
[] -> 0
nil -> 0
end
end

def encode(decimal(size: size), nil), do: <<0::size(size)>>

def encode(:boolean, true), do: <<1>>
def encode(:boolean, false), do: <<0>>
def encode(:boolean, nil), do: <<0>>

def encode({:array, type}, [_ | _] = l) do
[encode(:varint, length(l)) | encode_many(l, type)]
def encode(:date, %Date{} = date) do
<<Date.diff(date, @epoch_date)::16-little>>
end

def encode({:array, _type}, []), do: <<0>>
def encode({:array, _type}, nil), do: <<0>>
def encode(:date, nil), do: <<0::16>>

def encode(:datetime, %NaiveDateTime{} = datetime) do
<<NaiveDateTime.diff(datetime, @epoch_naive_datetime)::32-little>>
Expand All @@ -117,6 +114,24 @@ defmodule Ch.RowBinary do
encode(t, v)
end

def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp})
when scale == -exp do
i = sign * coef
<<i::size(size)-little>>
end

def encode(decimal(size: size, scale: scale), %Decimal{sign: sign, coef: coef, exp: exp})
when exp >= 0 do
i = sign * coef * round(:math.pow(10, exp + scale))
<<i::size(size)-little>>
end

def encode(decimal(scale: scale) = t, %Decimal{} = d) do
encode(t, Decimal.round(d, scale))
end

def encode(decimal(size: size), nil), do: <<0::size(size)>>

def encode(datetime64(unit: unit), %NaiveDateTime{} = datetime) do
<<NaiveDateTime.diff(datetime, @epoch_naive_datetime, unit)::64-little-signed>>
end
Expand All @@ -127,12 +142,6 @@ defmodule Ch.RowBinary do

def encode(datetime64(), nil), do: <<0::64>>

def encode(:date, %Date{} = date) do
<<Date.diff(date, @epoch_date)::16-little>>
end

def encode(:date, nil), do: <<0::16>>

def encode(:date32, %Date{} = date) do
<<Date.diff(date, @epoch_date)::32-little-signed>>
end
Expand All @@ -158,10 +167,19 @@ defmodule Ch.RowBinary do
def encode(:uuid, nil), do: <<0::128>>

def encode({:nullable, _type}, nil), do: 1
def encode({:nullable, type}, value), do: [0 | encode(type, value)]
def encode({:nullable, type}, value), do: [0 | ensure_iodata(encode(type, value))]

defp encode_many(type, values) do
case values do
[el | rest] -> [encode(type, el) | encode_many(type, rest)]
[] = done -> done
end
end

defp encode_many([el | rest], type), do: [encode(type, el) | encode_many(rest, type)]
defp encode_many([] = done, _type), do: done
@compile inline: [ensure_iodata: 1]
defp ensure_iodata(b) when is_binary(b), do: b
defp ensure_iodata(l) when is_list(l), do: l
defp ensure_iodata(i) when is_integer(i), do: [i]

@compile {:inline, d: 1}

Expand Down Expand Up @@ -220,6 +238,7 @@ defmodule Ch.RowBinary do

def encode_type({:nullable, type}), do: ["Nullable(", encode_type(type), ?)]
def encode_type({:array, type}), do: ["Array(", encode_type(type), ?)]
def encode_type({:low_cardinality, type}), do: ["LowCardinality(", encode_type(type), ?)]
def encode_type(:datetime), do: "DateTime"

def encode_type({:datetime, timezone}) when is_binary(timezone) do
Expand Down
12 changes: 9 additions & 3 deletions test/ch/row_binary_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ defmodule Ch.RowBinaryTest do
{{:nullable, :string}, "string"}
]

for {type, expected} <- spec do
encoded = IO.iodata_to_binary(encode(type, expected)) |> IO.inspect()
assert decode_rows(encoded, [type]) == [[expected]]
end

num_cols = length(spec)
{types, row} = Enum.unzip(spec)

Expand Down Expand Up @@ -123,7 +128,8 @@ defmodule Ch.RowBinaryTest do

test "nil" do
assert encode({:nullable, :string}, nil) == 1
assert encode(:string, nil) == <<0>>
assert encode({:nullable, :string}, "") == [0, 0 | ""]
assert encode(:string, nil) == 0
assert encode(string(size: 2), nil) == <<0, 0>>
assert encode(:u8, nil) == <<0>>
assert encode(:u16, nil) == <<0, 0>>
Expand All @@ -135,8 +141,8 @@ defmodule Ch.RowBinaryTest do
assert encode(:i64, nil) == <<0, 0, 0, 0, 0, 0, 0, 0>>
assert encode(:f32, nil) == <<0, 0, 0, 0>>
assert encode(:f64, nil) == <<0, 0, 0, 0, 0, 0, 0, 0>>
assert encode(:boolean, nil) == <<0>>
assert encode({:array, :string}, nil) == <<0>>
assert encode(:boolean, nil) == 0
assert encode({:array, :string}, nil) == 0
assert encode(:date, nil) == <<0, 0>>
assert encode(:date32, nil) == <<0, 0, 0, 0>>
assert encode(:datetime, nil) == <<0, 0, 0, 0>>
Expand Down