Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 517 lines (433 sloc) 15.895 kB
8184302 @sorear Start draft of serialization/deserialization code
authored
1 using System;
d832c02 @sorear Second draft of serialization code
authored
2 using System.IO;
3 using System.Security.Cryptography;
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
4 using System.Collections.Generic;
d832c02 @sorear Second draft of serialization code
authored
5 using System.Text;
8184302 @sorear Start draft of serialization/deserialization code
authored
6
cdde7e0 @sorear Notes on four kinds of module scope
authored
7 // Here in Niecza we have four different kinds of unit scopes:
8 //
9 // * COMPILING::UNIT, aka RuntimeUnit: one of these exists for every
10 // call into the compiler, whether eval or module. The REPL will
11 // be considered as if it were eval.
12 //
13 // * Serialization scopes, which are created when compiling modules
14 // or when pre-compiling a main program. During execution there is
15 // no current serialization scope; evals inherit the serialization
16 // scope or lack thereof that was in effect.
17 //
18 // * Assembly scopes, which are an artifact of the CLR and almost align
19 // with precompilation scopes, except that they have to exist always
20 // because methods cannot be created free-floating in CLR 2.x.
21 //
22 // An assembly scope is created for all serialization scopes, and
23 // non-saved anonymous assemblies are created for eval-and-run of
24 // a file and non-BEGIN-time evals.
25 //
26 // * GLOBAL scope is very much like serialization scope except that there
27 // is a "true globals" scope that is used when not serializing.
28
8184302 @sorear Start draft of serialization/deserialization code
authored
29 // This implements "bounded serialization" for Niecza. Unfortunately
30 // the CLR's builtin serialization can't efficiently be made bounded,
31 // and anyway it would be nice if the serialization format could be
32 // transported across backends.
33
34 // TODO: implement a more Storable-like interface.
d832c02 @sorear Second draft of serialization code
authored
35
36 // Note, the serialization subsystem is *NOT* thread safe !
8184302 @sorear Start draft of serialization/deserialization code
authored
37 namespace Niecza.Serialization {
d832c02 @sorear Second draft of serialization code
authored
38 // Information kept on a serialization unit after loading or storing,
39 // but not before storing.
40 class SerUnit {
41 internal string name; // eg "File.Copy"
42 internal byte[] hash; // hash of entire file, filled at write time
43 internal object[] bynum; // objects in unit
44 internal object root; // the RuntimeUnit object
45 internal int nobj;
46 }
8184302 @sorear Start draft of serialization/deserialization code
authored
47
48 // The central feature of *bounded* serialization is that object
49 // registries are kept distinct from the (de)serializer, and can
50 // be shared between serialization runs.
51 class ObjectRegistry {
d832c02 @sorear Second draft of serialization code
authored
52 // TODO: investigate a more specialized representation,
53 // ideally not having to hash as many objects
54 struct ObjRef {
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
55 public SerUnit unit;
56 public int id;
8184302 @sorear Start draft of serialization/deserialization code
authored
57 }
d832c02 @sorear Second draft of serialization code
authored
58 Dictionary<object,ObjRef> byref = new Dictionary<object,ObjRef>();
8184302 @sorear Start draft of serialization/deserialization code
authored
59
d832c02 @sorear Second draft of serialization code
authored
60 Dictionary<string,SerUnit> units =
61 new Dictionary<string,SerUnit>();
62
63 static readonly HashAlgorithm hash = SHA256.Create();
64 static readonly string signature = "Niecza-Serialized-Module";
65 static readonly int version = 1;
66
67 // Routines for use by serialization code
68 public bool CheckWriteObject(SerUnit into, object o,
69 out SerUnit lui, out int id) {
70 ObjRef or;
71 if (byref.TryGetValue(o, out or)) {
72 lui = or.unit;
73 id = or.id;
8184302 @sorear Start draft of serialization/deserialization code
authored
74 return true;
d832c02 @sorear Second draft of serialization code
authored
75 }
8184302 @sorear Start draft of serialization/deserialization code
authored
76
d832c02 @sorear Second draft of serialization code
authored
77 if (into.nobj == into.bynum.Length)
78 Array.Resize(ref into.bynum, into.nobj * 2);
8184302 @sorear Start draft of serialization/deserialization code
authored
79
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
80 or.unit = lui = into;
d832c02 @sorear Second draft of serialization code
authored
81 id = or.id = into.nobj++;
82 into.bynum[id] = o;
83
84 byref[o] = or;
8184302 @sorear Start draft of serialization/deserialization code
authored
85
86 return false;
87 }
d832c02 @sorear Second draft of serialization code
authored
88
89 // Routines for use by compilation manager
90
91 // Loads a single unit from the compiled-data directory.
92 // Will throw a ThawException if a stale reference is encountered
93 // or other data format error.
94 public SerUnit LoadUnit(string name) {
95 SerUnit su;
96
97 // is the unit already loaded?
98 if (units.TryGetValue(name, out su))
99 return su;
100
101 string file = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,
102 name + ".ser");
103 byte[] bytes = File.ReadAllBytes(file);
104
105 su = new SerUnit();
106 su.name = name;
107 su.hash = hash.ComputeHash(bytes);
108
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
109 ThawBuffer tb = new ThawBuffer(this, su, bytes);
d832c02 @sorear Second draft of serialization code
authored
110
111 bool success = false;
112 try {
113 string rsig = tb.String();
114 if (rsig != signature)
115 throw new ThawException("signature mismatch loading " + file);
116 int rver = tb.Int();
117 if (rver != version)
118 throw new ThawException("version mismatch loading " + file);
119
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
120 su.root = tb.ObjRef();
d832c02 @sorear Second draft of serialization code
authored
121 success = true;
122 } finally {
123 // don't leave half-read units in the map
124 if (!success)
125 UnloadUnit(name);
126 }
127
128 return su;
129 }
130
131 // removes a stale unit so a new version can be saved over it.
132 public void UnloadUnit(string name) {
133 SerUnit su = units[name];
134 units.Remove(name);
135
136 for (int i = 0; i < su.nobj; i++)
137 byref.Remove(su.bynum[i]);
138 }
139
140 public SerUnit SaveUnit(string name, IFreeze root) {
141 SerUnit su = new SerUnit();
142 su.name = name;
143 su.root = root;
144
145 if (units.ContainsKey(name))
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
146 throw new InvalidOperationException("unit " +name+ " exists");
d832c02 @sorear Second draft of serialization code
authored
147
148 bool success = false;
149 string file = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,
150 name + ".ser");
151
152 FreezeBuffer fb = new FreezeBuffer(this, su);
153
154 try {
155 fb.String(signature);
156 fb.Int(version);
157 fb.ObjRef(root);
158
159 byte[] data = fb.GetData();
160 su.hash = hash.ComputeHash(data);
161 File.WriteAllBytes(file, data);
162 success = true;
163 } finally {
164 if (!success)
165 UnloadUnit(name);
166 }
167
168 return su;
169 }
8184302 @sorear Start draft of serialization/deserialization code
authored
170 }
171
172 // One of these codes is written at the beginning of every object ref
1993029 @sorear Start implementing serialization for our objects
authored
173 enum SerializationCode : byte {
d832c02 @sorear Second draft of serialization code
authored
174 // special
175 Null,
176
8184302 @sorear Start draft of serialization/deserialization code
authored
177 // existing objects
178 ForeignRef,
179 SelfRef,
180 NewUnitRef,
1993029 @sorear Start implementing serialization for our objects
authored
181
182 // types of new object
183 RuntimeUnit,
184 SubInfo,
185 STable,
186 StashEnt,
d91ecdf @sorear Serialization for objects of type Rat, FatRat, Complex, BigInteger, I…
authored
187 Rat,
188 FatRat,
189 Complex,
190 BigInteger,
191 VarDeque,
192 VarHash,
1993029 @sorear Start implementing serialization for our objects
authored
193
194 // types of P6any-reified object
d91ecdf @sorear Serialization for objects of type Rat, FatRat, Complex, BigInteger, I…
authored
195 P6opaque, // eventually let's specialize this
1993029 @sorear Start implementing serialization for our objects
authored
196 Frame,
197 Cursor,
198
d91ecdf @sorear Serialization for objects of type Rat, FatRat, Complex, BigInteger, I…
authored
199 // miscellany
200 Variant, // allow 5, see FallbackFreeze
201
ab35238 @sorear Tidy up STable fields a bit, add TiedVariable serialize
authored
202 // variables
203 SimpleVariable = Variant + 5, // allow 4 for flags
1993029 @sorear Start implementing serialization for our objects
authored
204 SubstrLValue = SimpleVariable + 4,
ab35238 @sorear Tidy up STable fields a bit, add TiedVariable serialize
authored
205 TiedVariable,
1993029 @sorear Start implementing serialization for our objects
authored
206
207 // vivification hooks
ab35238 @sorear Tidy up STable fields a bit, add TiedVariable serialize
authored
208 SubViviHook,
1993029 @sorear Start implementing serialization for our objects
authored
209 ArrayViviHook,
210 NewArrayViviHook,
211 HashViviHook,
212 NewHashViviHook,
b1088dc @sorear implement serialization for SubInfo, LAD, LexInfo
authored
213
214 // Longest-token automaton descriptors
215 LADNone, // no-args
216 LADNull,
217 LADDot,
218 LADDispatcher,
219 LADImp,
220 LADStr, // string
221 LADStrNoCase,
222 LADMethod,
223 LADParam,
224 LADOpt, // LAD
225 LADPlus,
226 LADStar,
227 LADSequence, // LAD[]
228 LADAny,
229 LADCC, // CC
8184302 @sorear Start draft of serialization/deserialization code
authored
230 }
231
232 // An instance of this class is used to serialize serialization units
1993029 @sorear Start implementing serialization for our objects
authored
233 public class FreezeBuffer {
8184302 @sorear Start draft of serialization/deserialization code
authored
234 byte[] data;
235 int wpointer;
236
d832c02 @sorear Second draft of serialization code
authored
237 Dictionary<SerUnit,int> unit_to_offset;
238 int usedunits;
8184302 @sorear Start draft of serialization/deserialization code
authored
239
240 ObjectRegistry reg;
d832c02 @sorear Second draft of serialization code
authored
241 SerUnit unit;
8184302 @sorear Start draft of serialization/deserialization code
authored
242
d832c02 @sorear Second draft of serialization code
authored
243 internal FreezeBuffer(ObjectRegistry reg, SerUnit unit) {
8184302 @sorear Start draft of serialization/deserialization code
authored
244 this.reg = reg;
d832c02 @sorear Second draft of serialization code
authored
245 this.unit = unit;
246 unit_to_offset = new Dictionary<SerUnit,int>();
8184302 @sorear Start draft of serialization/deserialization code
authored
247 data = new byte[256];
248 }
249
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
250 internal byte[] GetData() {
251 byte[] ret = new byte[wpointer];
252 Array.Copy(data, ret, ret.Length);
253 return ret;
254 }
255
8184302 @sorear Start draft of serialization/deserialization code
authored
256 void Ensure(int ct) {
257 while (ct + wpointer > data.Length)
258 Array.Resize(ref data, data.Length * 2);
259 }
260
261 public void Byte(byte x) {
262 Ensure(1);
263 data[wpointer++] = x;
264 }
265
266 public void Short(short x) {
267 Ensure(2);
268 data[wpointer++] = (byte)(x >> 8);
269 data[wpointer++] = (byte)(x );
270 }
271
272 public void Int(int x) {
273 Ensure(4);
274 data[wpointer++] = (byte)(x >> 24);
275 data[wpointer++] = (byte)(x >> 16);
276 data[wpointer++] = (byte)(x >> 8);
277 data[wpointer++] = (byte)(x );
278 }
279
280 public void Long(long x) {
281 Ensure(8);
282 data[wpointer++] = (byte)(x >> 56);
283 data[wpointer++] = (byte)(x >> 48);
284 data[wpointer++] = (byte)(x >> 40);
285 data[wpointer++] = (byte)(x >> 32);
286 data[wpointer++] = (byte)(x >> 24);
287 data[wpointer++] = (byte)(x >> 16);
288 data[wpointer++] = (byte)(x >> 8);
289 data[wpointer++] = (byte)(x );
290 }
291
292 public void String(string s) {
293 if (s == null) {
294 Int(-1);
295 } else {
296 Int(s.Length);
297 foreach (char ch in s)
298 Short((short)ch);
299 }
300 }
301
b1088dc @sorear implement serialization for SubInfo, LAD, LexInfo
authored
302 public void Ints(int[] s) {
303 if (s == null) {
304 Int(-1);
305 } else {
306 Int(s.Length);
307 foreach (int ch in s)
308 Int(ch);
309 }
310 }
311
d91ecdf @sorear Serialization for objects of type Rat, FatRat, Complex, BigInteger, I…
authored
312 public void Refs<T> (T[] x) where T: IFreeze {
313 if (x == null) {
314 Int(-1);
315 } else {
316 Int(x.Length);
317 foreach (T y in x)
318 ObjRef(y);
319 }
320 }
321
d832c02 @sorear Second draft of serialization code
authored
322 // This is the main routine you should call from your Freeze
323 // callbacks to freeze an object
d91ecdf @sorear Serialization for objects of type Rat, FatRat, Complex, BigInteger, I…
authored
324 public void ObjRef(object o) {
d832c02 @sorear Second draft of serialization code
authored
325 int id;
326 SerUnit altunit;
327 if (o == null) { // null pointers are special
328 Byte((byte)SerializationCode.Null);
329 return;
330 }
331
332 if (reg.CheckWriteObject(unit, o, out altunit, out id)) {
8184302 @sorear Start draft of serialization/deserialization code
authored
333 if (altunit == unit) {
334 Byte((byte)SerializationCode.SelfRef);
335 } else {
d832c02 @sorear Second draft of serialization code
authored
336 int altcode;
337 if (!unit_to_offset.TryGetValue(altunit, out altcode)) {
8184302 @sorear Start draft of serialization/deserialization code
authored
338 Byte((byte)SerializationCode.NewUnitRef);
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
339 String(altunit.name);
d832c02 @sorear Second draft of serialization code
authored
340 // save the hash too so stale refs can be caught
341 foreach (byte b in altunit.hash) Byte(b);
342
8184302 @sorear Start draft of serialization/deserialization code
authored
343 unit_to_offset[altunit] = usedunits++;
344 } else {
345 Byte((byte)SerializationCode.ForeignRef);
d832c02 @sorear Second draft of serialization code
authored
346 Int(altcode);
8184302 @sorear Start draft of serialization/deserialization code
authored
347 }
348 }
349 Int((int)id);
350 } else {
d832c02 @sorear Second draft of serialization code
authored
351 // must take responsibility for saving the tag
d91ecdf @sorear Serialization for objects of type Rat, FatRat, Complex, BigInteger, I…
authored
352 IFreeze f = o as IFreeze;
353 if (f != null) {
354 f.Freeze(this);
355 } else {
356 FallbackFreeze(o);
357 }
358 }
359 }
360
361 // Call this to freeze a variant-typed value. (Avoid)
362 static Type[] anyTypes = new Type[] {
363 typeof(string), typeof(P6any[]), typeof(Variable[]),
364 typeof(int), typeof(double),
365 };
366
367 void FallbackFreeze(object o) {
368 int ix = 0;
369 Type t = o.GetType();
370 while (ix != 11 && anyTypes[ix] != t) ix++;
371 Byte((byte)(((int)SerializationCode.Variant) + ix));
372
373 switch(ix) {
374 case 0:
375 String((string)o);
376 break;
377 case 1:
378 Refs((P6any[])o);
379 break;
380 case 2:
381 Refs((Variable[])o);
382 break;
383 case 3:
384 Int((int)o);
385 break;
386 case 4:
387 Long(BitConverter.DoubleToInt64Bits((double)o));
388 break;
389 default:
390 throw new NotImplementedException(t.FullName);
8184302 @sorear Start draft of serialization/deserialization code
authored
391 }
392 }
393 }
394
395 // Note that this interface only handles freezing - thaw is done using
396 // a switch statement.
1993029 @sorear Start implementing serialization for our objects
authored
397 public interface IFreeze {
8184302 @sorear Start draft of serialization/deserialization code
authored
398 void Freeze(FreezeBuffer fb);
399 }
400
401 class ThawBuffer {
402 byte[] data;
403 int rpointer;
404 ObjectRegistry reg;
405
d832c02 @sorear Second draft of serialization code
authored
406 SerUnit[] unit_map = new SerUnit[8];
8184302 @sorear Start draft of serialization/deserialization code
authored
407 int refed_units;
d832c02 @sorear Second draft of serialization code
authored
408 SerUnit unit;
8184302 @sorear Start draft of serialization/deserialization code
authored
409
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
410 internal ThawBuffer(ObjectRegistry reg, SerUnit unit, byte[] data) {
8184302 @sorear Start draft of serialization/deserialization code
authored
411 this.data = data;
412 this.reg = reg;
d832c02 @sorear Second draft of serialization code
authored
413 this.unit = unit;
8184302 @sorear Start draft of serialization/deserialization code
authored
414 }
415
b25256f @sorear Add Serialize.cs to build, fix build errors
authored
416 public byte Byte() { return data[rpointer++]; }
417
418 public short Short() {
419 return (short)((((int)Byte()) << 8) | Byte());
420 }
421
422 public int Int() {
423 return (((int)Byte()) << 24) | (((int)Byte()) << 16) |
424 (((int)Byte()) << 8) | ((int)Byte());
425 }
426
427 public long Long() {
428 // try to do as much as possible in 32-bit precision,
429 // but suppress sign extension
430 return (((long)Int()) << 32) | (long)(uint)Int();
431 }
432
433 public string String() {
434 int l = Int();
435
436 if (l < 0) return null;
437 char[] cb = new char[l];
438
439 for (int i = 0; i < l; i++)
440 cb[i] = (char)Short();
441
442 return new string(cb);
443 }
444
445 public byte[] Bytes(int k) {
446 byte[] buf = new byte[k];
447
448 for (int i = 0; i < k; i++)
449 buf[i] = Byte();
450
451 return buf;
452 }
453
454 public object ObjRef() {
8184302 @sorear Start draft of serialization/deserialization code
authored
455 var tag = (SerializationCode)Byte();
456 int i, j;
457 switch(tag) {
d832c02 @sorear Second draft of serialization code
authored
458 case SerializationCode.Null:
459 return null;
8184302 @sorear Start draft of serialization/deserialization code
authored
460 case SerializationCode.SelfRef:
461 i = Int();
d832c02 @sorear Second draft of serialization code
authored
462 return unit.bynum[i];
8184302 @sorear Start draft of serialization/deserialization code
authored
463 case SerializationCode.ForeignRef:
464 i = Int();
465 j = Int();
d832c02 @sorear Second draft of serialization code
authored
466 return unit_map[i].bynum[j];
8184302 @sorear Start draft of serialization/deserialization code
authored
467 case SerializationCode.NewUnitRef:
d832c02 @sorear Second draft of serialization code
authored
468 return LoadNewUnit();
8184302 @sorear Start draft of serialization/deserialization code
authored
469 default:
470 throw new ThawException("unexpected object tag" + (byte)tag);
471 }
472 }
d832c02 @sorear Second draft of serialization code
authored
473
474 object LoadNewUnit() {
475 string name = String();
476 if (refed_units == unit_map.Length)
477 Array.Resize(ref unit_map, refed_units * 2);
478
479 SerUnit su = reg.LoadUnit(name);
480 unit_map[refed_units++] = su;
481
482 byte[] hash = Bytes(su.hash.Length);
483
484 for (int i = 0; i < hash.Length; i++)
485 if (hash[i] != su.hash[i])
486 goto badhash;
487
488 int ix = Int();
489 return su.bynum[ix];
490
491 badhash:
492 StringBuilder sb = new StringBuilder();
493 sb.AppendFormat("Hash mismatch for unit {0} referenced from {1}",
494 su.name, unit.name);
495
496 sb.Append(", wanted ");
497 foreach (byte b in hash)
498 sb.AppendFormat("{0:X2}", b);
499
500 sb.Append(", got ");
501 foreach (byte b in su.hash)
502 sb.AppendFormat("{0:X2}", b);
503
504 throw new ThawException(sb.ToString());
505 }
506 }
507
508 // Thrown to indicate data format problems in the serialized stream
509 // Not necessarily bugs; could also indicate stale files, including
510 // cases where the data format is changed and cases where a depended
511 // file was recreated
512 class ThawException : Exception {
513 public ThawException(string s) : base(s) { }
514 public ThawException() : base() { }
8184302 @sorear Start draft of serialization/deserialization code
authored
515 }
516 }
Something went wrong with that request. Please try again.