Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Imported code.

  • Loading branch information...
commit 03310794fa529e23b5a1edd31b97247793cc60f0 1 parent fb58015
mfp authored
4 .gitignore
View
@@ -0,0 +1,4 @@
+/.omakedb
+/.omakedb.lock
+*.swp
+*.omc
27 OMakefile
View
@@ -0,0 +1,27 @@
+
+NATIVE_ENABLED = true
+OCAMLOPTFLAGS = -S -inline 100
+USE_OCAMLFIND = true
+
+OCAMLPACKS[] =
+ unix
+ extlib
+
+OBJECTS[] =
+ size
+ fasthashtbl
+ hashtbl_mod
+ hashtbl_hval
+ ternary
+ trie_map
+ trie_map_mod
+ benchmark
+
+OCamlProgram(benchmark, $(OBJECTS))
+
+.DEFAULT: benchmark
+
+.PHONY: clean
+
+clean:
+ rm -f $(filter-proper-targets $(ls R, .)) *.s
45 OMakeroot
View
@@ -0,0 +1,45 @@
+########################################################################
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this file, to deal in the File without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the File, and to permit persons to whom the
+# File is furnished to do so, subject to the following condition:
+#
+# THE FILE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE FILE OR
+# THE USE OR OTHER DEALINGS IN THE FILE.
+
+########################################################################
+# The standard OMakeroot file.
+# You will not normally need to modify this file.
+# By default, your changes should be placed in the
+# OMakefile in this directory.
+#
+# If you decide to modify this file, note that it uses exactly
+# the same syntax as the OMakefile.
+#
+
+#
+# Include the standard installed configuration files.
+# Any of these can be deleted if you are not using them,
+# but you probably want to keep the Common file.
+#
+open build/C
+open build/OCaml
+open build/LaTeX
+
+#
+# The command-line variables are defined *after* the
+# standard configuration has been loaded.
+#
+DefineCommandVars()
+
+#
+# Include the OMakefile in this directory.
+#
+.SUBDIRS: .
362 benchmark.ml
View
@@ -0,0 +1,362 @@
+open Printf
+open ExtArray
+
+let time f x =
+ let t0 = Unix.gettimeofday () in
+ let y = f x in
+ let dt = Unix.gettimeofday () -. t0 in
+ (y, dt)
+
+let default_iters = ref 20
+
+let time ?(overhead = 0.) ?(iters = !default_iters) f x =
+ let rec loop best = function
+ n when n <= 0 -> let y, dt = time f x in (y, min (dt -. overhead) best)
+ | n -> let y, dt = time f x in loop (min (dt -. overhead) best) (n - 1)
+ in loop max_float iters
+
+let shuffle a =
+ let a = Array.copy a in
+ let n = ref (Array.length a) in
+ while !n > 1 do
+ decr n;
+ let k = Random.int (!n + 1) in
+ let tmp = a.(k) in
+ a.(k) <- a.(!n);
+ a.(!n) <- tmp
+ done;
+ a
+
+let sort a =
+ let a = Array.copy a in
+ Array.sort String.compare a;
+ a
+
+let read_lines file =
+ let ic = open_in file in
+ let rec loop acc =
+ let line = try Some (input_line ic) with End_of_file -> None in
+ match line with
+ Some "" -> loop acc
+ | Some line -> loop (line :: acc)
+ | None -> close_in ic; Array.of_list (List.rev acc)
+ in loop []
+
+let lines = shuffle (read_lines "/usr/share/dict/american-english")
+let strings = shuffle (read_lines "/usr/share/dict/american-english-large")
+let strings' = shuffle (read_lines "/usr/share/dict/spanish")
+
+let hitrate = 100. *. float (Array.length lines) /. float (Array.length strings)
+
+let hitrate' =
+ let len = Array.length lines in
+ let h = Fasthashtbl.create len in
+ let hits = ref 0 in
+ Array.iter (fun x -> Fasthashtbl.add h x x) lines;
+ Array.iter (fun x -> if Fasthashtbl.mem h x then incr hits) strings';
+ 100. *. float !hits /. float len
+
+let ints =
+ let len = Array.length strings in
+ Array.init len (fun i -> Random.int (100 * len))
+
+let () = ints.(0) <- 42
+
+let timed ?overhead msg f iters =
+ let y, dt = time ?overhead f iters in
+ printf " %-36s %8.5fs (%.0f / sec)\n%!" msg dt (float iters /. dt);
+ y
+
+let timed_array ?overhead msg f a =
+ let iters = Array.length a in
+ let y, dt = time ?overhead f a in
+ printf " %-36s %8.5fs (%.0f / sec)\n%!" msg dt (float iters /. dt);
+ y
+
+let timed_array' overhead msg f a = timed_array ~overhead msg f a
+
+module type STRING_OPS =
+sig
+ type 'v t
+ val id : string
+ val build : string array -> string t
+ val find_arr_hit : 'v t -> string array -> unit
+ val find_arr_miss : 'v t -> string array -> unit
+ val find_constant : 'v t -> int -> unit
+ val measure_overhead : 'v t -> int -> unit
+ val load_factor : 'v t -> float option
+end
+
+module type IMP_HASH =
+sig
+ type ('k, 'v) t
+ val id : string
+ val create : int -> ('k, 'v) t
+ val add : ('k, 'v) t -> 'k -> 'v -> unit
+ val find : ('k, 'v) t -> 'k -> 'v
+ val nop : ('k, 'v) t -> int -> unit
+ val load_factor : ('k, 'v) t -> float option
+end
+
+module Ops_imp (H : IMP_HASH) =
+struct
+ let id = H.id
+
+ let find_arr_hit h a = Array.iter (fun l -> ignore (H.find h l)) a
+
+ let find_arr_miss h a =
+ Array.iter (fun l -> try ignore (H.find h l) with Not_found -> ()) a
+
+ let build a =
+ let h = H.create 15 in
+ Array.iter (fun l -> H.add h l l) a;
+ h
+
+ let find_constant h x n = for i = 1 to n do ignore (H.find h x) done
+
+ let measure_overhead h n = for i = 1 to n do H.nop h n done
+end
+
+module Sops_imp(H : IMP_HASH) : STRING_OPS =
+struct
+ type 'v t = (string, 'v) H.t
+ include Ops_imp(H)
+ let find_constant h n = for i = 1 to n do ignore (H.find h "test") done
+ let load_factor h = H.load_factor h
+end
+
+module Sops_imp'
+ (MH : functor (H : Hashtbl.HashedType with type t = string) -> sig
+ type 'a t
+ val id : string
+ val create : int -> 'a t
+ val add : 'v t -> string -> 'v -> unit
+ val find : 'v t -> string -> 'v
+ val nop : 'v t -> int -> unit
+ val load_factor : 'v t -> float option
+ end) : STRING_OPS =
+struct
+ module H = MH(struct
+ type t = string
+ let equal a b =
+ String.length a = String.length b && String.compare a b = 0
+ let hash s =
+ let n = ref 0 in
+ for i = 0 to String.length s - 1 do
+ n := !n * 33 + Char.code (String.unsafe_get s i)
+ done;
+ !n
+ end)
+ type 'v t = 'v H.t
+ let id = "Functorized " ^ H.id
+
+ let find_arr_hit h a = Array.iter (fun l -> ignore (H.find h l)) a
+
+ let find_arr_miss h a =
+ Array.iter (fun l -> try ignore (H.find h l) with Not_found -> ()) a
+
+ let build a =
+ let h = H.create 15 in
+ Array.iter (fun l -> H.add h l l) a;
+ h
+
+ let find_constant h n = for i = 1 to n do ignore (H.find h "test") done
+ let measure_overhead h n = for i = 1 to n do H.nop h n done
+
+ let load_factor = H.load_factor
+end
+
+module Sops_func(H : sig
+ type 'v t
+ val id : string
+ val empty : 'v t
+ val add : string -> 'v -> 'v t -> 'v t
+ val find : string -> 'v t -> 'v
+ val nop : 'v t -> int -> unit
+ end) : STRING_OPS with type 'v t = 'v H.t =
+struct
+ type 'v t = 'v H.t
+ let id = H.id
+ let find_arr_hit h a = Array.iter (fun l -> ignore (H.find l h)) a
+ let find_arr_miss h a =
+ Array.iter (fun l -> try ignore (H.find l h) with Not_found -> ()) a
+ let find_constant h n = for i = 1 to n do ignore (H.find "test" h) done
+ let build = Array.fold_left (fun m x -> H.add x x m) H.empty
+ let measure_overhead h n = for i = 1 to n do H.nop h n done
+ let load_factor h = None
+end
+
+let print_size_and_load_factor load_factor_f h =
+ let lfactor_s = match load_factor_f h with
+ None -> ""
+ | Some f -> sprintf "(load factor %5.3f)" f
+ in printf " struct size: %d %s\n" (Size.size_b h) lfactor_s
+
+module BM_strings(Ops : STRING_OPS) =
+struct
+ open Ops
+ let run show_size =
+ printf "%s:\n" id;
+ print_endline " strings";
+ let sorted = sort lines in
+ let rev_sorted = Array.rev sorted in
+ let sorted_strings' = sort strings' in
+ let shuffled = shuffle lines in
+ let h = timed_array "add" build lines in
+ let h' = timed_array "add (sorted)" build sorted in
+ let (), o = time ~iters:10 (measure_overhead h) 20_000_000 in
+ let o = o /. 2e7 in (* time per iteration *)
+ let overhead a = o *. float (Array.length a) in
+ let o1 = overhead lines in
+ let o2 = overhead strings in
+ let o3 = overhead strings' in
+ if show_size then print_size_and_load_factor Ops.load_factor h;
+ timed "find (hit, constant + overhead)" (find_constant h) 1000000;
+ timed ~overhead:(o *. 1e6) "find (hit, constant, no overhead)"
+ (find_constant h) 1_000_000;
+ printf
+ " overhead (%8.5fms / million items) removed from next measurements:\n"
+ (o *. 1e9);
+ timed_array' o1 "find (hit, randomized)" (find_arr_hit h) shuffled;
+ timed_array' o1 "find (hit)" (find_arr_hit h) lines;
+ timed_array' o1 "find (hit, sorted)" (find_arr_hit h) sorted;
+ timed_array' o1 "find (hit, rev sorted)" (find_arr_hit h) rev_sorted;
+ timed_array' o1 "find (hit, sorted vs. sorted)" (find_arr_hit h') sorted;
+ timed_array' o1 "find (hit, rev sorted vs. sorted)" (find_arr_hit h')
+ rev_sorted;
+ timed_array' o2 (sprintf "find (%4.1f%% hit, rand)" hitrate)
+ (find_arr_miss h) strings;
+ timed_array' o3 (sprintf "find (%4.1f%% hit, rand)" hitrate')
+ (find_arr_miss h) strings';
+ timed_array' o3 (sprintf "find (%4.1f%% hit, sorted)" hitrate')
+ (find_arr_miss h) sorted_strings';
+ timed_array' o3 (sprintf "find (%4.1f%% hit, sorted vs. sorted)" hitrate')
+ (find_arr_miss h') sorted_strings';
+ print_newline ()
+end
+
+module BM_int_imp(H : IMP_HASH) =
+struct
+ include Ops_imp(H)
+
+ let run show_size =
+ printf "%s:\n" H.id;
+ print_endline " ints";
+ let h =
+ timed_array "add"
+ (fun a -> let h = H.create 13 in Array.iter (fun l -> H.add h l l) a; h)
+ ints in
+ let (), o = time ~iters:10 (measure_overhead h) 20_000_000 in
+ let o = o /. 2e7 in (* time per iteration *)
+ let o1 = o *. float (Array.length ints) in
+ if show_size then print_size_and_load_factor H.load_factor h;
+ timed "find (constant w/ overhead)" (find_constant h 42) 1000000;
+ timed ~overhead:(o *. 1e6) "find (constant, no overhead)"
+ (find_constant h 42) 1000000;
+ printf
+ " overhead (%8.5fms / million items) removed from next measurements:\n"
+ (o *. 1e9);
+ timed_array' o1 "find (hit, randomized)" (find_arr_hit h) (shuffle ints);
+ timed_array' o1 "find (hit)" (find_arr_hit h) (ints);
+ timed_array' o1 "find (1% hit, rand.)"
+ (find_arr_miss h)
+ (shuffle (Array.init (Array.length ints) (fun i -> i)));
+ print_newline ()
+end
+
+let nop h n = ()
+
+module FH = struct
+ include Fasthashtbl
+ let id = "Fasthashtbl"
+ let nop = nop
+ let load_factor h = Some (load_factor h)
+end
+
+module H = struct
+ include Hashtbl
+ let id = "Hashtbl"
+ let nop = nop
+ let load_factor h = None
+end
+
+module H' = struct
+ include Hashtbl_mod
+ let id = "Hashtbl_mod"
+ let nop = nop
+ let load_factor h = Some (load_factor h)
+end
+
+module H'' = struct
+ include Hashtbl_hval
+ let id = "Hashtbl_hval"
+ let nop = nop
+ let load_factor h = Some (load_factor h)
+end
+
+module T = struct include Ternary let id = "Ternary" let nop = nop end
+
+module TM = struct include Trie_map let id = "Trie_map" let nop = nop end
+
+module TM' = struct include Trie_map_mod let id = "Trie_map'" let nop = nop end
+
+module M = struct include Map.Make(String) let id = "Map" let nop = nop end
+
+
+module type HT = Hashtbl.HashedType
+
+module MH(H : HT) = struct
+ include Hashtbl.Make(H)
+ let id = "Hashtbl"
+ let nop = nop
+ let load_factor h = None
+end
+
+module MH'(H : HT) = struct
+ include Hashtbl_mod.Make(H)
+ let id = "Hashtbl_mod"
+ let nop = nop
+ let load_factor h = None
+end
+
+module MH''(H : HT) = struct
+ include Hashtbl_hval.Make(H)
+ let id = "Hashtbl_hval"
+ let nop = nop
+ let load_factor h = None
+end
+
+let show_sizes = ref false
+
+let args = [
+ "-n", Arg.Set_int default_iters, "N Number of iterations (default: 10)";
+ "-s", Arg.Set show_sizes, " Show structure sizes.";
+]
+
+let () =
+ Arg.parse (Arg.align args) ignore "";
+ printf "String set size: %d\n" (Array.length lines);
+ printf "Target array 1: %d\n" (Array.length strings);
+ printf "Target array 2: %d\n" (Array.length strings');
+ print_newline ();
+ Gc.compact ();
+ for i = 0 to 0 do
+ let module B = BM_int_imp(FH) in B.run !show_sizes;
+ let module B = BM_int_imp(H) in B.run !show_sizes;
+ let module B = BM_int_imp(H') in B.run !show_sizes;
+ let module B = BM_int_imp(H'') in B.run !show_sizes;
+ let module B = BM_strings(Sops_imp(FH)) in B.run !show_sizes;
+ let module B = BM_strings(Sops_imp(H)) in B.run !show_sizes;
+ let module B = BM_strings(Sops_imp(H')) in B.run !show_sizes;
+ let module B = BM_strings(Sops_imp(H'')) in B.run !show_sizes;
+
+ (* let module B = BM_strings(Sops_imp'(MH)) in B.run !show_sizes; *)
+ (* let module B = BM_strings(Sops_imp'(MH')) in B.run !show_sizes; *)
+ (* let module B = BM_strings(Sops_imp'(MH'')) in B.run !show_sizes; *)
+
+ let module B = BM_strings(Sops_func(T)) in B.run !show_sizes;
+ let module B = BM_strings(Sops_func(TM)) in B.run false;
+ let module B = BM_strings(Sops_func(TM')) in B.run !show_sizes;
+ let module B = BM_strings(Sops_func(M)) in B.run !show_sizes;
+ ()
+ done
138 fasthashtbl.ml
View
@@ -0,0 +1,138 @@
+
+type ('k, 'v) t =
+ { mutable size : int;
+ mutable deletions : int;
+ mutable data : ('k, 'v) entry array }
+
+and ('k, 'v) entry = Empty | Removed | Data of 'k * 'v * int
+
+(* max_array_length is odd, want an even factor *)
+let max_len = (Sys.max_array_length + 1) / 2
+
+let hash = Hashtbl.hash
+
+let pow_of_two_size n =
+ let rec loop m =
+ if m > max_len then max_len
+ else if m >= n then m
+ else loop (2 * m)
+ in loop 8
+
+let mask h = Array.length h.data - 1
+
+let length h = h.size
+
+let stride hval mask = (((hval lsr 16) lor (hval lsl 16)) land mask) lor 1
+
+let max_deletions h = Array.length h.data lsr 1
+
+let create initial_size =
+ let s = pow_of_two_size initial_size in
+ { size = 0; deletions = 0; data = Array.make s Empty }
+
+let resize h nsize =
+ let odata = h.data in
+ let nsize = pow_of_two_size nsize in
+ let osize = Array.length odata in
+ if nsize <> osize then begin
+ let ndata = Array.create nsize Empty in
+ let nmask = nsize - 1 in
+ for i = 0 to osize - 1 do
+ match odata.(i) with
+ Empty -> ()
+ | Removed -> ()
+ | Data (_, _, hash) as data ->
+ let pos = hash land nmask in
+ match ndata.(pos) with
+ Empty -> ndata.(pos) <- data
+ | Removed -> assert false
+ | Data _ ->
+ let stride = stride hash nmask in
+ let rec attempt pos =
+ match ndata.(pos) with
+ Empty -> ndata.(pos) <- data
+ | Removed -> assert false (* no removed at first *)
+ | Data _ -> attempt ((pos + stride) land nmask)
+ in attempt ((pos + stride) land nmask)
+ done;
+ h.data <- ndata;
+ h.deletions <- 0
+ end
+
+let add h k v =
+ let () =
+ let osize = Array.length h.data in
+ if h.size + 1 > osize lsr 1 then resize h (osize * 2) in
+ let mask = mask h in
+ let hval = hash k in
+ let i = hval land mask in
+ h.size <- h.size + 1;
+ match h.data.(i) with
+ Empty -> h.data.(i) <- Data (k, v, hval)
+ | Removed -> h.deletions <- h.deletions - 1;
+ h.data.(i) <- Data (k, v, hval)
+ | Data (k', _, hval') when hval = hval' && k = k' ->
+ h.data.(i) <- Data (k, v, hval)
+ | Data (k', v', hval') ->
+ let m = stride hval mask in
+ let rec walk_and_add n =
+ let n = (n + m) land mask in
+ match h.data.(n) with
+ Empty -> h.data.(n) <- Data (k, v, hval)
+ | Removed -> h.deletions <- h.deletions - 1;
+ h.data.(n) <- Data (k, v, hval)
+ | Data (k', _, hval') when hval = hval' && k = k' ->
+ h.data.(n) <- Data (k, v, hval)
+ | _ -> walk_and_add n
+ in walk_and_add i
+
+let resize_after_remove h =
+ if h.deletions > max_deletions h then
+ resize h (pow_of_two_size (2 * h.size))
+
+let remove h k =
+ let mask = mask h in
+ let hval = hash k in
+ let i = hval land mask in
+ match h.data.(i) with
+ Empty -> ()
+ | Data (k', _, hval') when hval = hval' && k = k' ->
+ h.data.(i) <- Removed;
+ h.deletions <- h.deletions + 1;
+ h.size <- h.size - 1;
+ resize_after_remove h
+ | _ ->
+ let stride = stride hval mask in
+ let rec walk_and_remove n =
+ let n = (n + stride) land mask in
+ match h.data.(n) with
+ Empty -> ()
+ | Data (k', _, hval') when hval = hval' && k = k' ->
+ h.data.(n) <- Removed;
+ h.deletions <- h.deletions + 1;
+ h.size <- h.size - 1
+ | _ -> walk_and_remove n
+ in walk_and_remove i;
+ resize_after_remove h
+
+let find h k =
+ let mask = mask h in
+ let hval = hash k in
+ let i = hval land mask in
+ match h.data.(i) with
+ Empty -> raise Not_found
+ | Data (k', v, hval') when hval = hval' && k = k' -> v
+ | _ ->
+ let rec walk data n stride mask =
+ let n = (n + stride) land mask in
+ match data.(n) with
+ Empty -> raise Not_found
+ | Data (k', v, hval') when hval = hval' && k = k' -> v
+ | _ -> walk data n stride mask
+ in walk h.data i (stride hval mask) mask
+
+let mem h k = try ignore (find h k); true with Not_found -> false
+
+let load_factor h = float h.size /. float (Array.length h.data)
+
+let del_factor h = float h.deletions /. float (Array.length h.data)
293 hashtbl_hval.ml
View
@@ -0,0 +1,293 @@
+(***********************************************************************)
+(* *)
+(* Objective Caml *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(* $Id: hashtbl.ml,v 1.27 2005-10-25 18:34:07 doligez Exp $ *)
+
+(* Hash tables *)
+
+external hash_param : int -> int -> 'a -> int = "caml_hash_univ_param" "noalloc"
+
+let hash x = hash_param 10 100 x
+
+(* We do dynamic hashing, and resize the table and rehash the elements
+ when buckets become too long. *)
+
+type ('a, 'b) t =
+ { mutable size: int; (* number of elements *)
+ mutable data: ('a, 'b) bucketlist array } (* the buckets *)
+
+and ('a, 'b) bucketlist =
+ Empty
+ | Cons of int * 'a * 'b * ('a, 'b) bucketlist
+
+let create initial_size =
+ let s = min (max 1 initial_size) Sys.max_array_length in
+ { size = 0; data = Array.make s Empty }
+
+let clear h =
+ for i = 0 to Array.length h.data - 1 do
+ h.data.(i) <- Empty
+ done;
+ h.size <- 0
+
+let copy h =
+ { size = h.size;
+ data = Array.copy h.data }
+
+let length h = h.size
+
+let load_factor h = float h.size /. float (Array.length h.data)
+
+let resize hashfun tbl =
+ let odata = tbl.data in
+ let osize = Array.length odata in
+ let nsize = min (2 * osize + 1) Sys.max_array_length in
+ if nsize <> osize then begin
+ let ndata = Array.create nsize Empty in
+ let rec insert_bucket = function
+ Empty -> ()
+ | Cons(hval, key, data, rest) ->
+ insert_bucket rest; (* preserve original order of elements *)
+ let nidx = hval mod nsize in
+ ndata.(nidx) <- Cons(hval, key, data, ndata.(nidx)) in
+ for i = 0 to osize - 1 do
+ insert_bucket odata.(i)
+ done;
+ tbl.data <- ndata;
+ end
+
+let add h key info =
+ let hval = hash key in
+ let i = hval mod (Array.length h.data) in
+ let bucket = Cons(hval, key, info, h.data.(i)) in
+ h.data.(i) <- bucket;
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsr 1 then resize hash h
+
+let remove h key =
+ let rec remove_bucket hval = function
+ Empty ->
+ Empty
+ | Cons(hval', k, i, next) ->
+ if hval = hval' && compare k key = 0
+ then begin h.size <- pred h.size; next end
+ else Cons(hval', k, i, remove_bucket hval next) in
+ let hval = hash key in
+ let i = hval mod (Array.length h.data) in
+ h.data.(i) <- remove_bucket hval h.data.(i)
+
+let rec find_rec hval key = function
+ Empty ->
+ raise Not_found
+ | Cons(hval', k, d, rest) ->
+ if hval = hval' && compare key k = 0 then d else find_rec hval key rest
+
+let find h key =
+ let hval = hash key in
+ match h.data.(hval mod (Array.length h.data)) with
+ Empty -> raise Not_found
+ | Cons(hval', k1, d1, rest1) ->
+ if hval = hval' && compare key k1 = 0 then d1 else
+ match rest1 with
+ Empty -> raise Not_found
+ | Cons(hval', k2, d2, rest2) ->
+ if hval = hval' && compare key k2 = 0 then d2 else
+ match rest2 with
+ Empty -> raise Not_found
+ | Cons(hval', k3, d3, rest3) ->
+ if hval = hval' && compare key k3 = 0 then d3 else find_rec hval key rest3
+
+let find_all h key =
+ let rec find_in_bucket hval = function
+ Empty ->
+ []
+ | Cons(hval', k, d, rest) ->
+ if hval = hval' && compare k key = 0
+ then d :: find_in_bucket hval rest
+ else find_in_bucket hval rest in
+ let hval = hash key in
+ find_in_bucket hval h.data.(hval mod (Array.length h.data))
+
+let replace h key info =
+ let rec replace_bucket hval = function
+ Empty ->
+ raise Not_found
+ | Cons(hval', k, i, next) ->
+ if hval = hval' && compare k key = 0
+ then Cons(hval', k, info, next)
+ else Cons(hval', k, i, replace_bucket hval next) in
+ let hval = hash key in
+ let i = hval mod (Array.length h.data) in
+ let l = h.data.(i) in
+ try
+ h.data.(i) <- replace_bucket hval l
+ with Not_found ->
+ h.data.(i) <- Cons(hval, key, info, l);
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsl 1 then resize hash h
+
+let mem h key =
+ let rec mem_in_bucket hval = function
+ | Empty ->
+ false
+ | Cons(hval', k, d, rest) ->
+ hval = hval' && compare k key = 0 || mem_in_bucket hval rest in
+ let hval = hash key in
+ mem_in_bucket hval h.data.(hval mod (Array.length h.data))
+
+let iter f h =
+ let rec do_bucket = function
+ Empty ->
+ ()
+ | Cons(_, k, d, rest) ->
+ f k d; do_bucket rest in
+ let d = h.data in
+ for i = 0 to Array.length d - 1 do
+ do_bucket d.(i)
+ done
+
+let fold f h init =
+ let rec do_bucket b accu =
+ match b with
+ Empty ->
+ accu
+ | Cons(_, k, d, rest) ->
+ do_bucket rest (f k d accu) in
+ let d = h.data in
+ let accu = ref init in
+ for i = 0 to Array.length d - 1 do
+ accu := do_bucket d.(i) !accu
+ done;
+ !accu
+
+(* Functorial interface *)
+
+module type HashedType =
+ sig
+ type t
+ val equal: t -> t -> bool
+ val hash: t -> int
+ end
+
+module type S =
+ sig
+ type key
+ type 'a t
+ val create: int -> 'a t
+ val clear: 'a t -> unit
+ val copy: 'a t -> 'a t
+ val add: 'a t -> key -> 'a -> unit
+ val remove: 'a t -> key -> unit
+ val find: 'a t -> key -> 'a
+ val find_all: 'a t -> key -> 'a list
+ val replace : 'a t -> key -> 'a -> unit
+ val mem : 'a t -> key -> bool
+ val iter: (key -> 'a -> unit) -> 'a t -> unit
+ val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
+ val length: 'a t -> int
+ end
+
+module Make(H: HashedType): (S with type key = H.t) =
+ struct
+ type key = H.t
+ type 'a hashtbl = (key, 'a) t
+ type 'a t = 'a hashtbl
+ let create = create
+ let clear = clear
+ let copy = copy
+
+ let safehash key = (H.hash key) land max_int
+
+ let add h key info =
+ let hval = safehash key in
+ let i = hval mod (Array.length h.data) in
+ let bucket = Cons(hval, key, info, h.data.(i)) in
+ h.data.(i) <- bucket;
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsr 1 then resize safehash h
+
+ let remove h key =
+ let rec remove_bucket hval = function
+ Empty ->
+ Empty
+ | Cons(hval', k, i, next) ->
+ if hval = hval' && H.equal k key
+ then begin h.size <- pred h.size; next end
+ else Cons(hval', k, i, remove_bucket hval next) in
+ let hval = safehash key in
+ let i = hval mod (Array.length h.data) in
+ h.data.(i) <- remove_bucket hval h.data.(i)
+
+ let rec find_rec hval key = function
+ Empty ->
+ raise Not_found
+ | Cons(hval', k, d, rest) ->
+ if hval = hval' && H.equal key k then d else find_rec hval key rest
+
+ let find h key =
+ let hval = safehash key in
+ match h.data.(hval mod (Array.length h.data)) with
+ Empty -> raise Not_found
+ | Cons(hval', k1, d1, rest1) ->
+ if hval' = hval && H.equal key k1 then d1 else
+ match rest1 with
+ Empty -> raise Not_found
+ | Cons(hval', k2, d2, rest2) ->
+ if hval' = hval && H.equal key k2 then d2 else
+ match rest2 with
+ Empty -> raise Not_found
+ | Cons(hval', k3, d3, rest3) ->
+ if hval' = hval && H.equal key k3 then d3 else find_rec hval key rest3
+
+ let find_all h key =
+ let rec find_in_bucket hval = function
+ Empty ->
+ []
+ | Cons(hval', k, d, rest) ->
+ if hval = hval' && H.equal k key
+ then d :: find_in_bucket hval rest
+ else find_in_bucket hval rest in
+ let hval = safehash key in
+ find_in_bucket hval h.data.(hval mod (Array.length h.data))
+
+ let replace h key info =
+ let rec replace_bucket hval = function
+ Empty ->
+ raise Not_found
+ | Cons(hval', k, i, next) ->
+ if hval = hval' && H.equal k key
+ then Cons(hval', k, info, next)
+ else Cons(hval', k, i, replace_bucket hval next) in
+ let hval = safehash key in
+ let i = hval mod (Array.length h.data) in
+ let l = h.data.(i) in
+ try
+ h.data.(i) <- replace_bucket hval l
+ with Not_found ->
+ h.data.(i) <- Cons(hval, key, info, l);
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsl 1 then resize safehash h
+
+ let mem h key =
+ let rec mem_in_bucket hval = function
+ | Empty ->
+ false
+ | Cons(hval', k, d, rest) ->
+ hval = hval' && H.equal k key || mem_in_bucket hval rest in
+ let hval = safehash key in
+ mem_in_bucket hval h.data.(hval mod (Array.length h.data))
+
+ let iter = iter
+ let fold = fold
+ let length = length
+ end
179 hashtbl_hval.mli
View
@@ -0,0 +1,179 @@
+(***********************************************************************)
+(* *)
+(* Objective Caml *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(* $Id: hashtbl.mli,v 1.39 2005-05-04 13:36:47 doligez Exp $ *)
+
+(** Hash tables and hash functions.
+
+ Hash tables are hashed association tables, with in-place modification.
+*)
+
+
+(** {6 Generic interface} *)
+
+
+type ('a, 'b) t
+(** The type of hash tables from type ['a] to type ['b]. *)
+
+val create : int -> ('a, 'b) t
+(** [Hashtbl.create n] creates a new, empty hash table, with
+ initial size [n]. For best results, [n] should be on the
+ order of the expected number of elements that will be in
+ the table. The table grows as needed, so [n] is just an
+ initial guess. *)
+
+val clear : ('a, 'b) t -> unit
+(** Empty a hash table. *)
+
+
+val add : ('a, 'b) t -> 'a -> 'b -> unit
+(** [Hashtbl.add tbl x y] adds a binding of [x] to [y] in table [tbl].
+ Previous bindings for [x] are not removed, but simply
+ hidden. That is, after performing {!Hashtbl.remove}[ tbl x],
+ the previous binding for [x], if any, is restored.
+ (Same behavior as with association lists.) *)
+
+val copy : ('a, 'b) t -> ('a, 'b) t
+(** Return a copy of the given hashtable. *)
+
+val find : ('a, 'b) t -> 'a -> 'b
+(** [Hashtbl.find tbl x] returns the current binding of [x] in [tbl],
+ or raises [Not_found] if no such binding exists. *)
+
+val find_all : ('a, 'b) t -> 'a -> 'b list
+(** [Hashtbl.find_all tbl x] returns the list of all data
+ associated with [x] in [tbl].
+ The current binding is returned first, then the previous
+ bindings, in reverse order of introduction in the table. *)
+
+val mem : ('a, 'b) t -> 'a -> bool
+(** [Hashtbl.mem tbl x] checks if [x] is bound in [tbl]. *)
+
+val remove : ('a, 'b) t -> 'a -> unit
+(** [Hashtbl.remove tbl x] removes the current binding of [x] in [tbl],
+ restoring the previous binding if it exists.
+ It does nothing if [x] is not bound in [tbl]. *)
+
+val replace : ('a, 'b) t -> 'a -> 'b -> unit
+(** [Hashtbl.replace tbl x y] replaces the current binding of [x]
+ in [tbl] by a binding of [x] to [y]. If [x] is unbound in [tbl],
+ a binding of [x] to [y] is added to [tbl].
+ This is functionally equivalent to {!Hashtbl.remove}[ tbl x]
+ followed by {!Hashtbl.add}[ tbl x y]. *)
+
+val iter : ('a -> 'b -> unit) -> ('a, 'b) t -> unit
+(** [Hashtbl.iter f tbl] applies [f] to all bindings in table [tbl].
+ [f] receives the key as first argument, and the associated value
+ as second argument. Each binding is presented exactly once to [f].
+ The order in which the bindings are passed to [f] is unspecified.
+ However, if the table contains several bindings for the same key,
+ they are passed to [f] in reverse order of introduction, that is,
+ the most recent binding is passed first. *)
+
+val fold : ('a -> 'b -> 'c -> 'c) -> ('a, 'b) t -> 'c -> 'c
+(** [Hashtbl.fold f tbl init] computes
+ [(f kN dN ... (f k1 d1 init)...)],
+ where [k1 ... kN] are the keys of all bindings in [tbl],
+ and [d1 ... dN] are the associated values.
+ Each binding is presented exactly once to [f].
+ The order in which the bindings are passed to [f] is unspecified.
+ However, if the table contains several bindings for the same key,
+ they are passed to [f] in reverse order of introduction, that is,
+ the most recent binding is passed first. *)
+
+
+val length : ('a, 'b) t -> int
+(** [Hashtbl.length tbl] returns the number of bindings in [tbl].
+ Multiple bindings are counted multiply, so [Hashtbl.length]
+ gives the number of times [Hashtbl.iter] calls its first argument. *)
+
+
+val load_factor : ('a, 'b) t -> float
+
+(** {6 Functorial interface} *)
+
+
+module type HashedType =
+ sig
+ type t
+ (** The type of the hashtable keys. *)
+ val equal : t -> t -> bool
+ (** The equality predicate used to compare keys. *)
+ val hash : t -> int
+ (** A hashing function on keys. It must be such that if two keys are
+ equal according to [equal], then they have identical hash values
+ as computed by [hash].
+ Examples: suitable ([equal], [hash]) pairs for arbitrary key
+ types include
+ ([(=)], {!Hashtbl.hash}) for comparing objects by structure,
+ ([(fun x y -> compare x y = 0)], {!Hashtbl.hash})
+ for comparing objects by structure and handling {!Pervasives.nan}
+ correctly, and
+ ([(==)], {!Hashtbl.hash}) for comparing objects by addresses
+ (e.g. for cyclic keys). *)
+ end
+(** The input signature of the functor {!Hashtbl.Make}. *)
+
+module type S =
+ sig
+ type key
+ type 'a t
+ val create : int -> 'a t
+ val clear : 'a t -> unit
+ val copy : 'a t -> 'a t
+ val add : 'a t -> key -> 'a -> unit
+ val remove : 'a t -> key -> unit
+ val find : 'a t -> key -> 'a
+ val find_all : 'a t -> key -> 'a list
+ val replace : 'a t -> key -> 'a -> unit
+ val mem : 'a t -> key -> bool
+ val iter : (key -> 'a -> unit) -> 'a t -> unit
+ val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
+ val length : 'a t -> int
+ end
+(** The output signature of the functor {!Hashtbl.Make}. *)
+
+module Make (H : HashedType) : S with type key = H.t
+(** Functor building an implementation of the hashtable structure.
+ The functor [Hashtbl.Make] returns a structure containing
+ a type [key] of keys and a type ['a t] of hash tables
+ associating data of type ['a] to keys of type [key].
+ The operations perform similarly to those of the generic
+ interface, but use the hashing and equality functions
+ specified in the functor argument [H] instead of generic
+ equality and hashing. *)
+
+
+(** {6 The polymorphic hash primitive} *)
+
+
+val hash : 'a -> int
+(** [Hashtbl.hash x] associates a positive integer to any value of
+ any type. It is guaranteed that
+ if [x = y] or [Pervasives.compare x y = 0], then [hash x = hash y].
+ Moreover, [hash] always terminates, even on cyclic
+ structures. *)
+
+external hash_param : int -> int -> 'a -> int = "caml_hash_univ_param" "noalloc"
+(** [Hashtbl.hash_param n m x] computes a hash value for [x], with the
+ same properties as for [hash]. The two extra parameters [n] and
+ [m] give more precise control over hashing. Hashing performs a
+ depth-first, right-to-left traversal of the structure [x], stopping
+ after [n] meaningful nodes were encountered, or [m] nodes,
+ meaningful or not, were encountered. Meaningful nodes are: integers;
+ floating-point numbers; strings; characters; booleans; and constant
+ constructors. Larger values of [m] and [n] means that more
+ nodes are taken into account to compute the final hash
+ value, and therefore collisions are less likely to happen.
+ However, hashing takes longer. The parameters [m] and [n]
+ govern the tradeoff between accuracy and speed. *)
281 hashtbl_mod.ml
View
@@ -0,0 +1,281 @@
+(***********************************************************************)
+(* *)
+(* Objective Caml *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(* $Id: hashtbl.ml,v 1.27 2005-10-25 18:34:07 doligez Exp $ *)
+
+(* Hash tables *)
+
+external hash_param : int -> int -> 'a -> int = "caml_hash_univ_param" "noalloc"
+
+let hash x = hash_param 10 100 x
+
+(* We do dynamic hashing, and resize the table and rehash the elements
+ when buckets become too long. *)
+
+type ('a, 'b) t =
+ { mutable size: int; (* number of elements *)
+ mutable data: ('a, 'b) bucketlist array } (* the buckets *)
+
+and ('a, 'b) bucketlist =
+ Empty
+ | Cons of 'a * 'b * ('a, 'b) bucketlist
+
+let create initial_size =
+ let s = min (max 1 initial_size) Sys.max_array_length in
+ { size = 0; data = Array.make s Empty }
+
+let clear h =
+ for i = 0 to Array.length h.data - 1 do
+ h.data.(i) <- Empty
+ done;
+ h.size <- 0
+
+let copy h =
+ { size = h.size;
+ data = Array.copy h.data }
+
+let length h = h.size
+
+let load_factor h = float h.size /. float (Array.length h.data)
+
+let resize hashfun tbl =
+ let odata = tbl.data in
+ let osize = Array.length odata in
+ let nsize = min (2 * osize + 1) Sys.max_array_length in
+ if nsize <> osize then begin
+ let ndata = Array.create nsize Empty in
+ let rec insert_bucket = function
+ Empty -> ()
+ | Cons(key, data, rest) ->
+ insert_bucket rest; (* preserve original order of elements *)
+ let nidx = (hashfun key) mod nsize in
+ ndata.(nidx) <- Cons(key, data, ndata.(nidx)) in
+ for i = 0 to osize - 1 do
+ insert_bucket odata.(i)
+ done;
+ tbl.data <- ndata;
+ end
+
+let add h key info =
+ let i = (hash key) mod (Array.length h.data) in
+ let bucket = Cons(key, info, h.data.(i)) in
+ h.data.(i) <- bucket;
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsr 1 then resize hash h
+
+let remove h key =
+ let rec remove_bucket = function
+ Empty ->
+ Empty
+ | Cons(k, i, next) ->
+ if compare k key = 0
+ then begin h.size <- pred h.size; next end
+ else Cons(k, i, remove_bucket next) in
+ let i = (hash key) mod (Array.length h.data) in
+ h.data.(i) <- remove_bucket h.data.(i)
+
+let rec find_rec key = function
+ Empty ->
+ raise Not_found
+ | Cons(k, d, rest) ->
+ if compare key k = 0 then d else find_rec key rest
+
+let find h key =
+ match h.data.((hash key) mod (Array.length h.data)) with
+ Empty -> raise Not_found
+ | Cons(k1, d1, rest1) ->
+ if compare key k1 = 0 then d1 else
+ match rest1 with
+ Empty -> raise Not_found
+ | Cons(k2, d2, rest2) ->
+ if compare key k2 = 0 then d2 else
+ match rest2 with
+ Empty -> raise Not_found
+ | Cons(k3, d3, rest3) ->
+ if compare key k3 = 0 then d3 else find_rec key rest3
+
+let find_all h key =
+ let rec find_in_bucket = function
+ Empty ->
+ []
+ | Cons(k, d, rest) ->
+ if compare k key = 0
+ then d :: find_in_bucket rest
+ else find_in_bucket rest in
+ find_in_bucket h.data.((hash key) mod (Array.length h.data))
+
+let replace h key info =
+ let rec replace_bucket = function
+ Empty ->
+ raise Not_found
+ | Cons(k, i, next) ->
+ if compare k key = 0
+ then Cons(k, info, next)
+ else Cons(k, i, replace_bucket next) in
+ let i = (hash key) mod (Array.length h.data) in
+ let l = h.data.(i) in
+ try
+ h.data.(i) <- replace_bucket l
+ with Not_found ->
+ h.data.(i) <- Cons(key, info, l);
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsl 1 then resize hash h
+
+let mem h key =
+ let rec mem_in_bucket = function
+ | Empty ->
+ false
+ | Cons(k, d, rest) ->
+ compare k key = 0 || mem_in_bucket rest in
+ mem_in_bucket h.data.((hash key) mod (Array.length h.data))
+
+let iter f h =
+ let rec do_bucket = function
+ Empty ->
+ ()
+ | Cons(k, d, rest) ->
+ f k d; do_bucket rest in
+ let d = h.data in
+ for i = 0 to Array.length d - 1 do
+ do_bucket d.(i)
+ done
+
+let fold f h init =
+ let rec do_bucket b accu =
+ match b with
+ Empty ->
+ accu
+ | Cons(k, d, rest) ->
+ do_bucket rest (f k d accu) in
+ let d = h.data in
+ let accu = ref init in
+ for i = 0 to Array.length d - 1 do
+ accu := do_bucket d.(i) !accu
+ done;
+ !accu
+
+(* Functorial interface *)
+
+module type HashedType =
+ sig
+ type t
+ val equal: t -> t -> bool
+ val hash: t -> int
+ end
+
+module type S =
+ sig
+ type key
+ type 'a t
+ val create: int -> 'a t
+ val clear: 'a t -> unit
+ val copy: 'a t -> 'a t
+ val add: 'a t -> key -> 'a -> unit
+ val remove: 'a t -> key -> unit
+ val find: 'a t -> key -> 'a
+ val find_all: 'a t -> key -> 'a list
+ val replace : 'a t -> key -> 'a -> unit
+ val mem : 'a t -> key -> bool
+ val iter: (key -> 'a -> unit) -> 'a t -> unit
+ val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
+ val length: 'a t -> int
+ end
+
+module Make(H: HashedType): (S with type key = H.t) =
+ struct
+ type key = H.t
+ type 'a hashtbl = (key, 'a) t
+ type 'a t = 'a hashtbl
+ let create = create
+ let clear = clear
+ let copy = copy
+
+ let safehash key = (H.hash key) land max_int
+
+ let add h key info =
+ let i = (safehash key) mod (Array.length h.data) in
+ let bucket = Cons(key, info, h.data.(i)) in
+ h.data.(i) <- bucket;
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsr 1 then resize safehash h
+
+ let remove h key =
+ let rec remove_bucket = function
+ Empty ->
+ Empty
+ | Cons(k, i, next) ->
+ if H.equal k key
+ then begin h.size <- pred h.size; next end
+ else Cons(k, i, remove_bucket next) in
+ let i = (safehash key) mod (Array.length h.data) in
+ h.data.(i) <- remove_bucket h.data.(i)
+
+ let rec find_rec key = function
+ Empty ->
+ raise Not_found
+ | Cons(k, d, rest) ->
+ if H.equal key k then d else find_rec key rest
+
+ let find h key =
+ match h.data.((safehash key) mod (Array.length h.data)) with
+ Empty -> raise Not_found
+ | Cons(k1, d1, rest1) ->
+ if H.equal key k1 then d1 else
+ match rest1 with
+ Empty -> raise Not_found
+ | Cons(k2, d2, rest2) ->
+ if H.equal key k2 then d2 else
+ match rest2 with
+ Empty -> raise Not_found
+ | Cons(k3, d3, rest3) ->
+ if H.equal key k3 then d3 else find_rec key rest3
+
+ let find_all h key =
+ let rec find_in_bucket = function
+ Empty ->
+ []
+ | Cons(k, d, rest) ->
+ if H.equal k key
+ then d :: find_in_bucket rest
+ else find_in_bucket rest in
+ find_in_bucket h.data.((safehash key) mod (Array.length h.data))
+
+ let replace h key info =
+ let rec replace_bucket = function
+ Empty ->
+ raise Not_found
+ | Cons(k, i, next) ->
+ if H.equal k key
+ then Cons(k, info, next)
+ else Cons(k, i, replace_bucket next) in
+ let i = (safehash key) mod (Array.length h.data) in
+ let l = h.data.(i) in
+ try
+ h.data.(i) <- replace_bucket l
+ with Not_found ->
+ h.data.(i) <- Cons(key, info, l);
+ h.size <- succ h.size;
+ if h.size > Array.length h.data lsl 1 then resize safehash h
+
+ let mem h key =
+ let rec mem_in_bucket = function
+ | Empty ->
+ false
+ | Cons(k, d, rest) ->
+ H.equal k key || mem_in_bucket rest in
+ mem_in_bucket h.data.((safehash key) mod (Array.length h.data))
+
+ let iter = iter
+ let fold = fold
+ let length = length
+ end
178 hashtbl_mod.mli
View
@@ -0,0 +1,178 @@
+(***********************************************************************)
+(* *)
+(* Objective Caml *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU Library General Public License, with *)
+(* the special exception on linking described in file ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(* $Id: hashtbl.mli,v 1.39 2005-05-04 13:36:47 doligez Exp $ *)
+
+(** Hash tables and hash functions.
+
+ Hash tables are hashed association tables, with in-place modification.
+*)
+
+
+(** {6 Generic interface} *)
+
+
+type ('a, 'b) t
+(** The type of hash tables from type ['a] to type ['b]. *)
+
+val create : int -> ('a, 'b) t
+(** [Hashtbl.create n] creates a new, empty hash table, with
+ initial size [n]. For best results, [n] should be on the
+ order of the expected number of elements that will be in
+ the table. The table grows as needed, so [n] is just an
+ initial guess. *)
+
+val clear : ('a, 'b) t -> unit
+(** Empty a hash table. *)
+
+
+val add : ('a, 'b) t -> 'a -> 'b -> unit
+(** [Hashtbl.add tbl x y] adds a binding of [x] to [y] in table [tbl].
+ Previous bindings for [x] are not removed, but simply
+ hidden. That is, after performing {!Hashtbl.remove}[ tbl x],
+ the previous binding for [x], if any, is restored.
+ (Same behavior as with association lists.) *)
+
+val copy : ('a, 'b) t -> ('a, 'b) t
+(** Return a copy of the given hashtable. *)
+
+val find : ('a, 'b) t -> 'a -> 'b
+(** [Hashtbl.find tbl x] returns the current binding of [x] in [tbl],
+ or raises [Not_found] if no such binding exists. *)
+
+val find_all : ('a, 'b) t -> 'a -> 'b list
+(** [Hashtbl.find_all tbl x] returns the list of all data
+ associated with [x] in [tbl].
+ The current binding is returned first, then the previous
+ bindings, in reverse order of introduction in the table. *)
+
+val mem : ('a, 'b) t -> 'a -> bool
+(** [Hashtbl.mem tbl x] checks if [x] is bound in [tbl]. *)
+
+val remove : ('a, 'b) t -> 'a -> unit
+(** [Hashtbl.remove tbl x] removes the current binding of [x] in [tbl],
+ restoring the previous binding if it exists.
+ It does nothing if [x] is not bound in [tbl]. *)
+
+val replace : ('a, 'b) t -> 'a -> 'b -> unit
+(** [Hashtbl.replace tbl x y] replaces the current binding of [x]
+ in [tbl] by a binding of [x] to [y]. If [x] is unbound in [tbl],
+ a binding of [x] to [y] is added to [tbl].
+ This is functionally equivalent to {!Hashtbl.remove}[ tbl x]
+ followed by {!Hashtbl.add}[ tbl x y]. *)
+
+val iter : ('a -> 'b -> unit) -> ('a, 'b) t -> unit
+(** [Hashtbl.iter f tbl] applies [f] to all bindings in table [tbl].
+ [f] receives the key as first argument, and the associated value
+ as second argument. Each binding is presented exactly once to [f].
+ The order in which the bindings are passed to [f] is unspecified.
+ However, if the table contains several bindings for the same key,
+ they are passed to [f] in reverse order of introduction, that is,
+ the most recent binding is passed first. *)
+
+val fold : ('a -> 'b -> 'c -> 'c) -> ('a, 'b) t -> 'c -> 'c
+(** [Hashtbl.fold f tbl init] computes
+ [(f kN dN ... (f k1 d1 init)...)],
+ where [k1 ... kN] are the keys of all bindings in [tbl],
+ and [d1 ... dN] are the associated values.
+ Each binding is presented exactly once to [f].
+ The order in which the bindings are passed to [f] is unspecified.
+ However, if the table contains several bindings for the same key,
+ they are passed to [f] in reverse order of introduction, that is,
+ the most recent binding is passed first. *)
+
+
+val length : ('a, 'b) t -> int
+(** [Hashtbl.length tbl] returns the number of bindings in [tbl].
+ Multiple bindings are counted multiply, so [Hashtbl.length]
+ gives the number of times [Hashtbl.iter] calls its first argument. *)
+
+val load_factor : ('a, 'b) t -> float
+
+(** {6 Functorial interface} *)
+
+
+module type HashedType =
+ sig
+ type t
+ (** The type of the hashtable keys. *)
+ val equal : t -> t -> bool
+ (** The equality predicate used to compare keys. *)
+ val hash : t -> int
+ (** A hashing function on keys. It must be such that if two keys are
+ equal according to [equal], then they have identical hash values
+ as computed by [hash].
+ Examples: suitable ([equal], [hash]) pairs for arbitrary key
+ types include
+ ([(=)], {!Hashtbl.hash}) for comparing objects by structure,
+ ([(fun x y -> compare x y = 0)], {!Hashtbl.hash})
+ for comparing objects by structure and handling {!Pervasives.nan}
+ correctly, and
+ ([(==)], {!Hashtbl.hash}) for comparing objects by addresses
+ (e.g. for cyclic keys). *)
+ end
+(** The input signature of the functor {!Hashtbl.Make}. *)
+
+module type S =
+ sig
+ type key
+ type 'a t
+ val create : int -> 'a t
+ val clear : 'a t -> unit
+ val copy : 'a t -> 'a t
+ val add : 'a t -> key -> 'a -> unit
+ val remove : 'a t -> key -> unit
+ val find : 'a t -> key -> 'a
+ val find_all : 'a t -> key -> 'a list
+ val replace : 'a t -> key -> 'a -> unit
+ val mem : 'a t -> key -> bool
+ val iter : (key -> 'a -> unit) -> 'a t -> unit
+ val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
+ val length : 'a t -> int
+ end
+(** The output signature of the functor {!Hashtbl.Make}. *)
+
+module Make (H : HashedType) : S with type key = H.t
+(** Functor building an implementation of the hashtable structure.
+ The functor [Hashtbl.Make] returns a structure containing
+ a type [key] of keys and a type ['a t] of hash tables
+ associating data of type ['a] to keys of type [key].
+ The operations perform similarly to those of the generic
+ interface, but use the hashing and equality functions
+ specified in the functor argument [H] instead of generic
+ equality and hashing. *)
+
+
+(** {6 The polymorphic hash primitive} *)
+
+
+val hash : 'a -> int
+(** [Hashtbl.hash x] associates a positive integer to any value of
+ any type. It is guaranteed that
+ if [x = y] or [Pervasives.compare x y = 0], then [hash x = hash y].
+ Moreover, [hash] always terminates, even on cyclic
+ structures. *)
+
+external hash_param : int -> int -> 'a -> int = "caml_hash_univ_param" "noalloc"
+(** [Hashtbl.hash_param n m x] computes a hash value for [x], with the
+ same properties as for [hash]. The two extra parameters [n] and
+ [m] give more precise control over hashing. Hashing performs a
+ depth-first, right-to-left traversal of the structure [x], stopping
+ after [n] meaningful nodes were encountered, or [m] nodes,
+ meaningful or not, were encountered. Meaningful nodes are: integers;
+ floating-point numbers; strings; characters; booleans; and constant
+ constructors. Larger values of [m] and [n] means that more
+ nodes are taken into account to compute the final hash
+ value, and therefore collisions are less likely to happen.
+ However, hashing takes longer. The parameters [m] and [n]
+ govern the tradeoff between accuracy and speed. *)
105 size.ml
View
@@ -0,0 +1,105 @@
+(**************************************************************************)
+(* *)
+(* Copyright (C) Jean-Christophe Filliatre *)
+(* *)
+(* This software is free software; you can redistribute it and/or *)
+(* modify it under the terms of the GNU Library General Public *)
+(* License version 2.1, with the special exception on linking *)
+(* described in file LICENSE. *)
+(* *)
+(* This software is distributed in the hope that it will be useful, *)
+(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
+(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
+(* *)
+(**************************************************************************)
+
+(*i $Id: size.ml,v 1.7 2008-07-21 14:53:06 filliatr Exp $ i*)
+
+(*i*)
+open Obj
+(*i*)
+
+(*s Pointers already visited are stored in a hash-table, where
+ comparisons are done using physical equality. *)
+
+module H = Hashtbl.Make(
+ struct
+ type t = Obj.t
+ let equal = (==)
+ let hash o = Hashtbl.hash (magic o : int)
+ end)
+
+let node_table = (H.create 257 : unit H.t)
+
+let in_table o = try H.find node_table o; true with Not_found -> false
+
+let add_in_table o = H.add node_table o ()
+
+let reset_table () = H.clear node_table
+
+
+(*
+module H = Hashtbl
+
+let node_table = H.create 257
+
+let in_table o =
+ try
+ List.exists ((==) o) (H.find_all node_table o)
+ with Not_found -> false
+
+let add_in_table o = H.add node_table o o
+
+let reset_table () = H.clear node_table
+ *)
+
+(*s Objects are traversed recursively, as soon as their tags are less than
+ [no_scan_tag]. [count] records the numbers of words already visited. *)
+
+let size_of_double = size (repr 1.0)
+
+let count = ref 0
+
+let rec traverse t =
+ if not (in_table t) then begin
+ add_in_table t;
+ if is_block t then begin
+ let n = size t in
+ let tag = tag t in
+ if tag < no_scan_tag then begin
+ count := !count + 1 + n;
+ for i = 0 to n - 1 do
+ let f = field t i in
+ if is_block f then traverse f
+ done
+ end else if tag = string_tag then
+ count := !count + 1 + n
+ else if tag = double_tag then
+ count := !count + size_of_double
+ else if tag = double_array_tag then
+ count := !count + 1 + size_of_double * n
+ else
+ incr count
+ end
+ end
+
+(*s Sizes of objects in words and in bytes. The size in bytes is computed
+ system-independently according to [Sys.word_size]. *)
+
+let size_w o =
+ reset_table ();
+ let gc_params = Gc.get () in
+ (* prevent compaction, so value in major heap won't move *)
+ Gc.set { gc_params with Gc.max_overhead = 10_000_000 };
+ (* move values to major heap, so no value is changed by the GC anymore *)
+ Gc.minor ();
+ count := 0;
+ traverse (repr o);
+ Gc.set gc_params;
+ !count
+
+let size_b o = (size_w o) * (Sys.word_size / 8)
+
+let size_kb o = (size_w o) / (8192 / Sys.word_size)
+
+
26 size.mli
View
@@ -0,0 +1,26 @@
+(**************************************************************************)
+(* *)
+(* Copyright (C) Jean-Christophe Filliatre *)
+(* *)
+(* This software is free software; you can redistribute it and/or *)
+(* modify it under the terms of the GNU Library General Public *)
+(* License version 2.1, with the special exception on linking *)
+(* described in file LICENSE. *)
+(* *)
+(* This software is distributed in the hope that it will be useful, *)
+(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
+(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
+(* *)
+(**************************************************************************)
+
+(*i $Id: size.mli,v 1.5 2008-07-21 14:53:06 filliatr Exp $ i*)
+
+(* Sizes of ocaml values (in their memory representation).
+ Sizes are given in words ([size_w]), bytes ([size_b]) or kilobytes
+ ([size_kb]), in a system-independent way. *)
+
+val size_w : 'a -> int
+
+val size_b : 'a -> int
+
+val size_kb : 'a -> int
169 ternary.ml
View
@@ -0,0 +1,169 @@
+
+type 'a t =
+ E (* empty *)
+ | NV of char * 'a t * 'a t * 'a t (* no value *)
+ | V of char * 'a t * 'a t * 'a t * 'a (* value *)
+
+type key = string
+
+let empty = E
+
+let rec length = function
+ E -> 0
+ | NV (_, l, m, r) | V (_, l, m, r, _) -> length l + length m + length r
+
+let rec is_empty = function
+ E -> true
+ | V _ -> false
+ | NV (_, l, m, r) -> is_empty l && is_empty m && is_empty r
+
+let find k t =
+ let rec loop s n maxn = function
+ E -> raise Not_found
+ | NV (c, l, m, r) | V (c, l, m, r, _) as node when n <= maxn ->
+ let c' = s.[n] in
+ if c' < c then loop s n maxn l
+ else if c' > c then loop s n maxn r
+ else begin match node with
+ NV _ -> loop s (n + 1) maxn m
+ | V (_, _, _, _, v) -> if n = maxn then v else loop s (n + 1) maxn m
+ | _ -> assert false
+ end
+ | _ -> raise Not_found in
+ let len = String.length k in
+ if len <> 0 then loop k 0 (len - 1) t
+ else match t with
+ E | NV _ -> raise Not_found
+ | V (_, _, _, _, v) -> v
+
+let mem k t = try ignore (find k t); true with Not_found -> false
+
+let add k v t =
+ let rec add k v off maxn = function
+ E ->
+ if off = maxn then V(k.[off], E, E, E, v)
+ (* maxn = -1 when k = "" *)
+ else if maxn = -1 then V (Char.chr 127, E, E, E, v)
+ else NV (k.[off], E, add k v (off + 1) maxn E, E)
+ | NV (c, l, m, r) ->
+ let cmp = Char.compare k.[off] c in
+ if cmp < 0 then NV (c, add k v off maxn l, m, r)
+ else if cmp > 0 then NV (c, l, m, add k v off maxn r)
+ else if off = maxn then V (c, l, m, r, v)
+ else NV (c, l, add k v (off + 1) maxn m, r)
+ | V (c, l, m, r, v') ->
+ let cmp = Char.compare k.[off] c in
+ if cmp < 0 then V (c, add k v off maxn l, m, r, v')
+ else if cmp > 0 then V (c, l, m, add k v off maxn r, v')
+ else if off = maxn then V (c, l, m, r, v)
+ else V (c, l, add k v (off + 1) maxn m, r, v')
+ in add k v 0 (String.length k - 1) t
+
+let prune = function
+ NV (_, E, E, E) -> E
+ | e -> e
+
+let remove k t =
+ let rec del k off maxn = function
+ E -> E
+ | NV (c, l, m, r) as t ->
+ let cmp = Char.compare k.[off] c in
+ let t =
+ if cmp < 0 then NV (c, del k off maxn l, m, r)
+ else if cmp > 0 then NV (c, l, m, del k off maxn r)
+ else if off = maxn then t
+ else NV (c, l, del k (off + 1) maxn m, r)
+ in prune t
+ | V (c, l, m, r, v) ->
+ let cmp = Char.compare k.[off] c in
+ if cmp < 0 then V (c, del k off maxn l, m, r, v)
+ else if cmp > 0 then V (c, l, m, del k off maxn r, v)
+ else if off = maxn then prune (NV (c, l, m, r))
+ else V (c, l, del k (off + 1) maxn m, r, v) in
+ let len = String.length k in
+ if len <> 0 then del k 0 (len - 1) t
+ else match t with
+ E | NV _ as t -> t (* nothing associated to "" *)
+ | V (c, l, m, r, _) -> NV (c, l, m, r)
+
+let (++) s c = s ^ String.make 1 c
+
+let iter f t =
+ let rec loop k = function
+ E -> ()
+ | NV (c, l, m, r) -> loop k l; loop (k ++ c) m; loop k r
+ | V (c, l, m, r, v) ->
+ let k' = k ++ c in
+ loop k l;
+ f k' v;
+ loop k' m;
+ loop k r
+ in loop "" t
+
+let fold f t acc =
+ let rec loop k t acc = match t with
+ E -> acc
+ | NV (c, l, m, r) ->
+ loop k r (loop (k ++ c) m (loop k l acc))
+ | V (c, l, m, r, v) ->
+ let k' = k ++ c in
+ loop k r (loop k' m (f k' v (loop k l acc)))
+ in loop "" t acc
+
+let mapi f t =
+ let rec domap k = function
+ E -> E
+ | NV (c, l, m, r) ->
+ (* make the order explicit *)
+ let l = domap k l in
+ let m = domap (k ++ c) m in
+ let r = domap k r in
+ NV (c, l, m, r)
+ | V (c, l, m, r, v) ->
+ let l = domap k l in
+ let k' = k ++ c in
+ let v = f k v in
+ let m = domap k' m in
+ let r = domap k r in
+ V(c, l, m, r, v)
+ in domap "" t
+
+let map f t = mapi (fun _ v -> f v) t
+
+let min_elt t =
+ let rec loop k = function
+ E -> None
+ | NV (c, l, m, r) ->
+ begin match loop k l with
+ Some _ as k -> k
+ | None -> match loop (k ++ c) m with
+ Some _ as k -> k
+ | None -> loop k r
+ end
+ | V (c, l, m, r, v) ->
+ match loop k l with
+ Some _ as k -> k
+ | None -> Some (k ++ c, v)
+ in loop "" t
+
+let rec compare cmp a b =
+ match min_elt a with
+ None -> begin
+ match min_elt b with
+ None -> 0
+ | Some _ -> -1
+ end
+ | Some (ka, va) ->
+ match min_elt b with
+ None -> 1
+ | Some (kb, vb) ->
+ match String.compare ka kb with
+ 0 ->
+ begin match cmp va vb with
+ 0 -> compare cmp (remove ka a) (remove kb b)
+ | n -> n
+ end
+ | n -> n
+
+let equal eq a b =
+ compare (fun a b -> if eq a b then 0 else -1) a b = 0
2  ternary.mli
View
@@ -0,0 +1,2 @@
+
+include Map.S with type key = string
84 trie_map.ml
View
@@ -0,0 +1,84 @@
+type key = string
+type 'a t = 'a node option
+and 'a node =
+{ value : 'a option;
+ split : char;
+ lokid : 'a t;
+ eqkid : 'a t;
+ hikid : 'a t; }
+
+let empty = None
+
+let is_empty = function None -> true | Some _ -> false
+
+
+let lookup k t =
+ let n = String.length k in
+ let rec go i = function
+ | None -> None
+ | Some t ->
+ let cmp = Char.compare k.[i] t.split in
+ if cmp < 0 then go i t.lokid else
+ if cmp > 0 then go i t.hikid else
+ if i+1 < n then go (i+1) t.eqkid else
+ t.value
+ in go 0 t
+
+let find k t = match lookup k t with
+| Some x -> x
+| None -> raise Not_found
+
+let mem k t = match lookup k t with
+| Some _ -> true
+| None -> false
+
+let add k v t =
+ let n = String.length k in
+ let rec go i = function
+ | None when i+1 < n ->
+ Some { value = None ; split = k.[i]; lokid = None; eqkid = go (i+1) None; hikid = None }
+ | None ->
+ Some { value = Some v; split = k.[i]; lokid = None; eqkid = None; hikid = None }
+ | Some t ->
+ let cmp = Char.compare k.[i] t.split in
+ if cmp < 0 then Some { t with lokid = go i t.lokid } else
+ if cmp > 0 then Some { t with hikid = go i t.hikid } else
+ if i+1 < n then Some { t with eqkid = go (i+1) t.eqkid } else
+ Some { t with value = Some v }
+ in go 0 t
+
+let remove k t =
+ let prune = function
+ | { value = None; lokid = None; eqkid = None; hikid = None } -> None
+ | t -> Some t
+ in
+ let n = String.length k in
+ let rec go i = function
+ | None -> None
+ | Some t ->
+ let cmp = Char.compare k.[i] t.split in
+ if cmp < 0 then prune { t with lokid = go i t.lokid } else
+ if cmp > 0 then prune { t with hikid = go i t.hikid } else
+ if i+1 < n then prune { t with eqkid = go (i+1) t.eqkid } else
+ prune { t with value = None }
+ in go 0 t
+
+let fold f t e =
+ let rec go prefix e = function
+ | None -> e
+ | Some t ->
+ let e = go prefix e t.lokid in
+ let key = prefix ^ String.make 1 t.split in
+ let e = match t.value with None -> e | Some v -> f key v e in
+ let e = go key e t.eqkid in
+ go prefix e t.hikid
+ in go "" e t
+
+let iter f t = fold (fun k v () -> f k v ) t ()
+and map f t = fold (fun k v -> add k (f v )) t empty
+and mapi f t = fold (fun k v -> add k (f k v)) t empty
+
+let compare _ _ _ = failwith "Not comparable"
+
+let equal eq t t' =
+ 0 == compare (fun v w -> if eq v w then 0 else 1) t t'
3  trie_map.mli
View
@@ -0,0 +1,3 @@
+
+include Map.S with type key = string
+
81 trie_map_mod.ml
View
@@ -0,0 +1,81 @@
+type 'a t = E
+ | L of 'a
+ | B of char * 'a t * 'a t * 'a t
+ | K of 'a * char * 'a t * 'a t * 'a t
+
+let empty = E
+
+let lookup k t =
+ let n = String.length k in
+ let rec go i = function
+ | E -> None
+ | L v when i == n -> Some v
+ | L _ -> None
+ | B ( _, _, _, _) when i == n -> None
+ | B ( c, l, q, h) ->
+ let c' = k.[i] in
+ if c' < c then go i l else
+ if c' > c then go i h else
+ go (i+1) q
+ | K (v, _, _, _, _) when i == n -> Some v
+ | K (_, c, l, q, h) ->
+ let c' = k.[i] in
+ if c' < c then go i l else
+ if c' > c then go i h else
+ go (i+1) q
+ in go 0 t
+
+let add k v t =
+ let n = String.length k in
+ let rec go i = function
+ | E when i == n -> L v
+ | E -> B ( k.[i], E, go (i+1) E, E)
+ | L _ when i == n -> L v
+ | L v -> K (v, k.[i], E, go (i+1) E, E)
+ | B ( c, l, q, h) when i == n -> K (v, c , l, q , h)
+ | B ( c, l, q, h) ->
+ let c' = k.[i] in
+ if c' < c then B ( c, go i l, q, h) else
+ if c' > c then B ( c, l, q, go i h) else
+ B ( c, l, go (i+1) q, h)
+ | K (_, c, l, q, h) when i == n -> K (v, c , l, q , h)
+ | K (v, c, l, q, h) ->
+ let c' = k.[i] in
+ if c' < c then K (v, c, go i l, q, h) else
+ if c' > c then K (v, c, l, q, go i h) else
+ K (v, c, l, go (i+1) q, h)
+ in go 0 t
+
+let remove k t =
+ let prune = function
+ | B ( _, E, E, E) -> E
+ | K (v, _, E, E, E) -> L v
+ | t -> t
+ in
+ let n = String.length k in
+ let rec go i t = match t with
+ | E -> t
+ | L _ when i == n -> E
+ | L _ -> t
+ | B ( _, _, _, _) when i == n -> t
+ | B ( c, l, q, h) ->
+ let c' = k.[i] in
+ if c' < c then prune (B ( c, go i l, q, h)) else
+ if c' > c then prune (B ( c, l, q, go i h)) else
+ prune (B ( c, l, go (i+1) q, h))
+ | K (_, c, l, q, h) when i == n -> B (c, l, q, h)
+ | K (v, c, l, q, h) ->
+ let c' = k.[i] in
+ if c' < c then prune (K (v, c, go i l, q, h)) else
+ if c' > c then prune (K (v, c, l, q, go i h)) else
+ prune (K (v, c, l, go (i+1) q, h))
+ in go 0 t
+
+let find k t = match lookup k t with
+| Some x -> x
+| None -> raise Not_found
+
+let mem k t = match lookup k t with
+| Some _ -> true
+| None -> false
+
Please sign in to comment.
Something went wrong with that request. Please try again.