forked from jaked/deriving
/
deriving_Dump.ml
261 lines (234 loc) · 7.78 KB
/
deriving_Dump.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
(** Dump **)
(* Copyright Jeremy Yallop 2007.
This file is free software, distributed under the MIT license.
See the file COPYING for details.
*)
(* TODO: we could have an additional debugging deserialisation method. *)
module type Dump = sig
type a
val to_buffer : Buffer.t -> a -> unit
val to_string : a -> string
val to_channel : out_channel -> a -> unit
val from_stream : char Stream.t -> a
val from_string : string -> a
val from_channel : in_channel -> a
end
module type SimpleDump = sig
type a
val to_buffer : Buffer.t -> a -> unit
val from_stream : char Stream.t -> a
end
exception Dump_error of string
let bad_tag tag stream typename =
raise (Dump_error
(Printf.sprintf
"Dump: failure during %s deserialisation at character %d; unexpected tag %d"
typename (Stream.count stream) tag))
module Defaults (P : sig
type a
val to_buffer : Buffer.t -> a -> unit
val from_stream : char Stream.t -> a
end) : Dump with type a = P.a =
struct
include P
(* is there a reasonable value to use here? *)
let buffer_size = 128
let to_string obj =
let buffer = Buffer.create buffer_size in
P.to_buffer buffer obj;
Buffer.contents buffer
(* should we explicitly deallocate the buffer? *)
and from_string string = P.from_stream (Stream.of_string string)
and from_channel in_channel =
from_stream (Stream.of_channel in_channel)
and to_channel out_channel obj =
let buffer = Buffer.create buffer_size in
P.to_buffer buffer obj;
Buffer.output_buffer out_channel buffer
end
(* Generic int dumper. This should work for any (fixed-size) integer
type with suitable operations. *)
module Dump_intN (P : sig
type t
val zero : t
val logand : t -> t -> t
val logor : t -> t -> t
val lognot : t -> t
val shift_right_logical : t -> int -> t
val shift_left : t -> int -> t
val of_int : int -> t
val to_int : t -> int
end) = Defaults (
struct
type a = P.t
(* Format an integer using the following scheme:
The lower 7 bits of each byte are used to store successive 7-bit
chunks of the integer.
The highest bit of each byte is used as a flag to indicate
whether the next byte is present.
*)
open Buffer
open Char
open P
let to_buffer buffer =
let rec aux int =
(* are there more than 7 bits? *)
if logand int (lognot (of_int 0x7f)) <> zero
(* if there are, write the lowest 7 bite plus a high bit (to
indicate that there's more). Then recurse, shifting the value
7 bits right *)
then begin
add_char buffer (chr (to_int (logor (of_int 0x80) (logand int (of_int 0x7f)))));
aux (shift_right_logical int 7)
end
(* otherwise, write the bottom 7 bits only *)
else add_char buffer (chr (to_int int))
in aux
and from_stream stream =
let rec aux (int : t) shift =
let c = of_int (code (Stream.next stream)) in
let int = logor int (shift_left (logand c (of_int 0x7f)) shift) in
if logand c (of_int 0x80) <> zero then aux int (shift + 7)
else int
in aux zero 0
end
)
module Dump_int32 = Dump_intN (Int32)
module Dump_int64 = Dump_intN (Int64)
module Dump_nativeint = Dump_intN (Nativeint)
module Dump_int = Defaults (
struct
type a = int
let to_buffer buffer int = Dump_nativeint.to_buffer buffer (Nativeint.of_int int)
and from_stream stream = Nativeint.to_int (Dump_nativeint.from_stream stream)
end
)
module Dump_char = Defaults (
struct
type a = char
let to_buffer = Buffer.add_char
and from_stream = Stream.next
end
)
(* This is questionable; it doesn't preserve sharing *)
module Dump_string = Defaults (
struct
type a = string
let to_buffer buffer string =
begin
Dump_int.to_buffer buffer (String.length string);
Buffer.add_string buffer string
end
and from_stream stream =
let len = Dump_int.from_stream stream in
let s = String.create len in
for i = 0 to len - 1 do
String.set s i (Stream.next stream) (* could use String.unsafe_set here *)
done;
s
end
)
module Dump_float = Defaults (
struct
type a = float
let to_buffer buffer f = Dump_int64.to_buffer buffer (Int64.bits_of_float f)
and from_stream stream = Int64.float_of_bits (Dump_int64.from_stream stream)
end
)
(* This should end up a bit more compact than the derived version *)
module Dump_list (P : SimpleDump) = Defaults (
(* This could perhaps be more efficient by serialising the list in
reverse: this would result in only one traversal being needed
during serialisation, and no "reverse" being needed during
deserialisation. (However, dumping would no longer be
tail-recursive) *)
struct
type a = P.a list
let to_buffer buffer items =
begin
Dump_int.to_buffer buffer (List.length items);
List.iter (P.to_buffer buffer) items
end
and from_stream stream =
let rec aux items = function
| 0 -> items
| n -> aux (P.from_stream stream :: items) (n-1)
in List.rev (aux [] (Dump_int.from_stream stream))
end
)
(* Dump_ref and Dump_array cannot preserve sharing, so we don't
provide implementations *)
module Dump_option (P : SimpleDump) = Defaults (
struct
type a = P.a option
let to_buffer buffer = function
| None -> Dump_int.to_buffer buffer 0
| Some s ->
begin
Dump_int.to_buffer buffer 1;
P.to_buffer buffer s
end
and from_stream stream =
match Dump_int.from_stream stream with
| 0 -> None
| 1 -> Some (P.from_stream stream)
| i -> bad_tag i stream "option"
end
)
module Dump_bool = Defaults (
struct
type a = bool
let to_buffer buffer = function
| false -> Buffer.add_char buffer '\000'
| true -> Buffer.add_char buffer '\001'
and from_stream stream =
match Stream.next stream with
| '\000' -> false
| '\001' -> true
| c -> bad_tag (Char.code c) stream "bool"
end
)
module Dump_unit = Defaults (
struct
type a = unit
let to_buffer _ () = ()
and from_stream _ = ()
end
)
module Dump_num = Defaults (
struct
(* TODO: a less wasteful dumper for nums. A good start would be
using half a byte per decimal-coded digit, instead of a whole
byte. *)
type a = Num.num
let to_buffer buffer n = Dump_string.to_buffer buffer (Num.string_of_num n)
and from_stream stream = Num.num_of_string (Dump_string.from_stream stream)
end
)
module Dump_undumpable (P : sig type a val tname : string end) = Defaults (
struct
type a = P.a
let to_buffer _ _ = failwith ("Dump: attempt to serialise a value of unserialisable type : " ^ P.tname)
let from_stream _ = failwith ("Dump: attempt to deserialise a value of unserialisable type : " ^ P.tname)
end
)
(* Uses Marshal to serialise the values that the parse-the-declarations
technique can't reach. *)
module Dump_via_marshal (P : sig type a end) = Defaults (
(* Rather inefficient. *)
struct
include P
let to_buffer buffer obj = Buffer.add_string buffer (Marshal.to_string obj [Marshal.Closures])
let from_stream stream =
let readn n =
let s = String.create n in
for i = 0 to n - 1 do
String.set s i (Stream.next stream)
done;
s
in
let header = readn Marshal.header_size in
let datasize = Marshal.data_size header 0 in
let datapart = readn datasize in
Marshal.from_string (header ^ datapart) 0
end)