/
trie.ml
103 lines (91 loc) · 2.92 KB
/
trie.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
open BatString;;
exception Empty_string;;
module ChildList = Map.Make(Char);;
(*
Fields:
1. Map of chars to child nodes
2. Whether this node represent a word
*)
type trie = Node of trie ChildList.t * bool;;
let empty: trie = Node (ChildList.empty, false);;
let rec sizeImpl (trie: trie): int =
let Node(children, _) = trie in
ChildList.fold
(fun char_key node accSize -> accSize + (sizeImpl node))
children
(* accounts for the current node *)
1
;;
(* Returns the number of nodes in a trie *)
let size (trie: trie): int =
(* We need to discount one because the root node does not represent a character *)
(sizeImpl trie) - 1
;;
(*
Returns the number of words in a trie, or the number of nodes
withhasEntry=true
*)
let rec entriesCount (trie: trie): int =
let Node(children, hasEntry) = trie in
ChildList.fold
(fun char_key node accSize -> accSize + (entriesCount node))
children
(* accounts for the current node *)
(if hasEntry then 1 else 0)
;;
let rec insert (s: char list) (trie: trie) : trie =
let Node(children, hasEntry) = trie in match s with
| [] -> Node(children, true)
| first_char :: rest ->
let currentChild = if ChildList.mem first_char children
then (ChildList.find first_char children)
else empty
in
let newChild = insert rest currentChild in
let newChildren = ChildList.add first_char newChild children in
Node(
newChildren,
hasEntry
)
;;
let rec merge (trieA: trie) (trieB: trie): trie =
let Node(childrenA, hasEntryA) = trieA in
let Node(childrenB, hasEntryB) = trieB in
let mergedChildren = ChildList.union
(fun key nextNodeA nextNodeB -> Some (merge nextNodeA nextNodeB))
childrenA
childrenB
in Node(mergedChildren, hasEntryA || hasEntryB)
;;
(* Convenience method to insert a string instead of a list of characters *)
let insertString (s: string) (trie: trie): trie =
insert (BatString.to_list s) trie;;
;;
let rec has (s: char list) (trie: trie): bool =
let Node(children, hasEntry) = trie in match s with
| [] -> hasEntry
| first_char :: rest ->
if ChildList.mem first_char children then
has rest (ChildList.find first_char children)
else false
;;
let rec equals (trieA: trie) (trieB: trie): bool =
let Node(childrenA, hasEntryA) = trieA in
let Node(childrenB, hasEntryB) = trieB in
if hasEntryA != hasEntryB then false
else
(
ChildList.for_all (fun chr nextChildA ->
ChildList.mem chr childrenB &&
equals nextChildA (ChildList.find chr childrenB)
) childrenA
) &&
(
ChildList.for_all (fun chr nextChildB ->
ChildList.mem chr childrenA &&
equals nextChildB (ChildList.find chr childrenA)
) childrenB
)
;;
(* Convenience method to search for a string instead of a list of characters *)
let hasString (s: string) (trie: trie): bool = has (BatString.to_list s) trie;;