From 72b77c3c81c55de4796acc5b8ca5e28343dfe61e Mon Sep 17 00:00:00 2001 From: Cliff Moon Date: Sun, 25 Jan 2009 10:48:45 -0800 Subject: [PATCH] variable header size with freelists --- elibs/dmtree.erl | 72 +++++++++++++++++++++++------------------- etest/dmerkle_test.erl | 2 +- etest/dmtree_test.erl | 8 ++--- include/dmerkle.hrl | 6 +++- 4 files changed, 50 insertions(+), 38 deletions(-) diff --git a/elibs/dmtree.erl b/elibs/dmtree.erl index 14608c0..16d5e63 100644 --- a/elibs/dmtree.erl +++ b/elibs/dmtree.erl @@ -24,7 +24,7 @@ -include("common.hrl"). -include_lib("kernel/include/file.hrl"). --record(dmtree, {file, size=0, virtsize=0, d, blocksize, filename, ops=[], opdict=dict:new(), freepointer=0, rootpointer=0}). +-record(dmtree, {file, size=0, virtsize=0, d, blocksize, headersize=0, filename, ops=[], opdict=dict:new(), freepointer=0, rootpointer=0, kfpointers=[]}). -ifdef(TEST). -include("etest/dmtree_test.erl"). @@ -107,12 +107,13 @@ init([FileName, BlockSize]) -> {ok, Header} -> {ok, create_or_read_root(Header#dmtree{filename=FileName,file=File,size=FileSize})}; {error, Msg} -> {stop, Msg}; eof -> - D = d_from_blocksize(BlockSize), - AlignedBlockSize = blocksize_from_d(D), - T = create_or_read_root(#dmtree{file=File,d=D,blocksize=AlignedBlockSize,filename=FileName,size=?HEADER_SIZE}), - % ?infoFmt("created T ~p~n", [T]), + D = ?d_from_blocksize(BlockSize), + HeaderSize = ?headersize_from_blocksize(BlockSize), + Pointers = lists:map(fun(_) -> 0 end, lists:seq(1,?pointers_from_blocksize(BlockSize))), + % we want to retain passed in blocksize, internal fragmentation don't matter + T = create_or_read_root(#dmtree{file=File,d=D,blocksize=BlockSize,filename=FileName,headersize=HeaderSize,size=HeaderSize,kfpointers=Pointers}), flush(File, T#dmtree.ops), - {ok, T#dmtree{ops=[],size=?HEADER_SIZE + AlignedBlockSize}} + {ok, T#dmtree{ops=[],size=HeaderSize + BlockSize}} end. %%-------------------------------------------------------------------- @@ -224,18 +225,24 @@ write_header(Tree = #dmtree{file=File}) -> Tree2. read_header(File) -> - case file:pread(File, 0, ?HEADER_SIZE) of - {ok, Bin} -> deserialize_header(Bin); + %gotta get the blocksize first + case file:pread(File, 1, 4) of + {ok, <>} -> + case file:pread(File, 0, ?headersize_from_blocksize(BlockSize)) of + {ok, Bin} -> deserialize_header(Bin); + eof -> eof; + {error, Msg} -> {error, Msg} + end; eof -> eof; {error, Msg} -> {error, Msg} end. int_read(0, Tree) -> - throw("tried to read a node from the null pointer"); + {error, "tried to read a node from the null pointer"}; -int_read(Offset, #dmtree{file=File,blocksize=BlockSize}) -> +int_read(Offset, #dmtree{file=File,d=D,blocksize=BlockSize}) -> case file:pread(File, Offset, BlockSize) of - {ok, Bin} -> deserialize(Bin, Offset); + {ok, Bin} -> deserialize(Bin, D, Offset); eof -> error_logger:info_msg("hit an eof for offset ~p", [Offset]), undefined; @@ -313,8 +320,9 @@ true_size(Bin) when is_binary(Bin) -> true_size(_) -> 1. -create_or_read_root(Tree = #dmtree{file=File,blocksize=BlockSize,rootpointer=0}) -> - {Root, Tree2} = int_write(#leaf{offset=?HEADER_SIZE}, Tree), +create_or_read_root(Tree = #dmtree{file=File,blocksize=BlockSize,headersize=HeaderSize,rootpointer=0}) -> + {Root, Tree2} = int_write(#leaf{offset=HeaderSize}, Tree), + % ?infoFmt("wrote root ~p~n", [Root]), write_header(Tree2#dmtree{rootpointer=offset(Root)}); create_or_read_root(Tree = #dmtree{file=File,blocksize=BlockSize,rootpointer=Ptr}) -> @@ -328,13 +336,21 @@ take_free_offset(eof, Tree = #dmtree{file=File,blocksize=BlockSize,freepointer=F take_free_offset(Offset, Tree) -> {Offset, Tree}. -serialize_header(#dmtree{blocksize=BlockSize, freepointer=FreePtr, rootpointer=RootPtr}) -> - FreeSpace = 64*8, - <>. +serialize_header(#dmtree{blocksize=BlockSize, freepointer=FreePtr, rootpointer=RootPtr, kfpointers=Pointers}) -> + Preamble = <>, + FreeSpace = (?STATIC_HEADER - byte_size(Preamble))*8, + PtrBin = << <> || Ptr <- Pointers >>, + % ?infoFmt("pointers: ~p~nptrbin~p~nfreespace~p~n", [Pointers, PtrBin, FreeSpace]), + <>. %this will try and match the current version, if it doesn't then we gotta punch out -deserialize_header(<>) -> - {ok, #dmtree{blocksize=BlockSize,d=d_from_blocksize(BlockSize),freepointer=FreePtr,rootpointer=RootPtr}}; +deserialize_header(<>) -> + PointerSize = ?pointers_from_blocksize(BlockSize), + <> = Rest, + Pointers = [Ptr || <> <= PBin], + HeaderSize = ?headersize_from_blocksize(BlockSize), + D = ?d_from_blocksize(BlockSize), + {ok, #dmtree{blocksize=BlockSize,d=D,headersize=HeaderSize,freepointer=FreePtr,rootpointer=RootPtr,kfpointers=Pointers}}; %hit the canopy deserialize_header(BinHeader) -> @@ -344,11 +360,10 @@ deserialize_header(BinHeader) -> end. %node is denoted by a 0 -deserialize(<<0:8, Binary/binary>>, Offset) -> - D = d_from_blocksize(byte_size(Binary) + 1), +deserialize(<<0:8, Binary/binary>>, D, Offset) -> KeyBinSize = D*4, ChildBinSize = (D+1)*12, - <> = Binary, + <> = Binary, if M > D -> error_logger:info_msg("M is larger than D M ~p D ~p offset~p~n", [M, D, Offset]); true -> ok @@ -357,14 +372,13 @@ deserialize(<<0:8, Binary/binary>>, Offset) -> Children = unpack_children(M+1, ChildBin), #node{m=M,children=Children,keys=Keys,offset=Offset}; -deserialize(<<1:8, Bin/binary>>, Offset) -> - D = d_from_blocksize(byte_size(Bin) + 1), +deserialize(<<1:8, Bin/binary>>, D, Offset) -> ValuesBinSize = D*16, <> = Bin, Values = unpack_values(M, ValuesBin), #leaf{m=M,values=Values,offset=Offset}; -deserialize(<<3:8, Pointer:64, _/binary>>, Offset) -> +deserialize(<<3:8, Pointer:64, _/binary>>, D, Offset) -> #free{offset=Offset,pointer=Pointer}. serialize(Free = #free{pointer=Pointer}, BlockSize) -> @@ -372,7 +386,7 @@ serialize(Free = #free{pointer=Pointer}, BlockSize) -> <<3:8,Pointer:64,0:LeftOverBits>>; serialize(Node = #node{keys=Keys,children=Children,m=M}, BlockSize) -> - D = d_from_blocksize(BlockSize), + D = ?d_from_blocksize(BlockSize), if M > D -> error_logger:info_msg("M is larger than D M ~p D ~p~n", [M, D]); length(Keys) == length(Children) -> error_logger:info_msg("There are as many children as keys for ~p~n", [Node]); @@ -390,7 +404,7 @@ serialize(Node = #node{keys=Keys,children=Children,m=M}, BlockSize) -> OutBin; serialize(#leaf{values=Values,m=M}, BlockSize) -> - D = d_from_blocksize(BlockSize), + D = ?d_from_blocksize(BlockSize), if M > D -> error_logger:info_msg("M is larger than D M ~p D ~p~n", [M, D]); true -> ok @@ -439,12 +453,6 @@ deserialize(Bin) when byte_size(Bin) < 8 -> deserialize(<>) -> {byte_size(Rest) + 8, NextPtr}. -d_from_blocksize(BlockSize) -> - trunc((BlockSize - 17)/16). - -blocksize_from_d(D) -> - trunc(16*D + 17). - offset(#leaf{offset=Offset}) -> Offset; offset(#free{offset=Offset}) -> Offset; offset(#node{offset=Offset}) -> Offset. diff --git a/etest/dmerkle_test.erl b/etest/dmerkle_test.erl index 4c32602..27659a7 100644 --- a/etest/dmerkle_test.erl +++ b/etest/dmerkle_test.erl @@ -11,7 +11,7 @@ open_and_close_test() -> Merkle = get_state(Pid), Root = Merkle#dmerkle.root, error_logger:info_msg("root ~p~n", [Root]), - ?assertEqual(?HEADER_SIZE, Root#leaf.offset), + ?assertEqual(?headersize_from_blocksize(256), Root#leaf.offset), ?assertEqual(0, Root#leaf.m), close(Pid). diff --git a/etest/dmtree_test.erl b/etest/dmtree_test.erl index 50cf323..7ea25bf 100644 --- a/etest/dmtree_test.erl +++ b/etest/dmtree_test.erl @@ -4,7 +4,7 @@ deserialize_node_test() -> NodeBin = <<0:8, 2:32, 1:32, 2:32, 0:32, 0:32, 3:32, 4:64, 5:32, 6:64, 7:32, 8:64, 0:32, 0:64, 0:32, 0:64>>, - Node = deserialize(NodeBin, 20), + Node = deserialize(NodeBin, 4, 20), #node{m=2,children=Children,keys=Keys,offset=20} = Node, [{3, 4}, {5, 6}, {7, 8}] = Children, [1, 2] = Keys. @@ -14,7 +14,7 @@ deserialize_leaf_test() -> 1:32, 2:64, 3:32, 4:32, 5:64, 6:32, 0:352>>, - Leaf = deserialize(LeafBin, 20), + Leaf = deserialize(LeafBin, 4, 20), #leaf{m=2,values=Values,offset=20} = Leaf, [{1, 2, 3}, {4, 5, 6}] = Values. @@ -48,7 +48,7 @@ node_round_trip_test() -> children=[{4, 5}, {6, 7}, {8, 9}], offset = 0 }, - Node = deserialize(serialize(Node, 81), 0). + Node = deserialize(serialize(Node, 81), 4, 0). leaf_round_trip_test() -> Leaf = #leaf{ @@ -56,4 +56,4 @@ leaf_round_trip_test() -> values=[{1, 2, 3}, {4, 5, 6}], offset=0 }, - Leaf = deserialize(serialize(Leaf, 81), 0). + Leaf = deserialize(serialize(Leaf, 81), 4, 0). diff --git a/include/dmerkle.hrl b/include/dmerkle.hrl index 1b19f67..6adb91e 100644 --- a/include/dmerkle.hrl +++ b/include/dmerkle.hrl @@ -1,5 +1,9 @@ -define(VERSION, 1). --define(HEADER_SIZE, 85). +-define(STATIC_HEADER, 85). + +-define(d_from_blocksize(BlockSize), trunc((BlockSize - 17)/16)). +-define(pointers_from_blocksize(BlockSize), (trunc(math:sqrt(BlockSize)) - 4)). +-define(headersize_from_blocksize(BlockSize), (?STATIC_HEADER + ?pointers_from_blocksize(BlockSize) * 8)). -record(node, {m=0, keys=[], children=[], offset=eof}). -record(leaf, {m=0, values=[], offset=eof}).