Skip to content

Commit

Permalink
feat(data/hashmap): initial implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
spl committed Mar 11, 2019
1 parent 51c31ce commit 5a2269b
Show file tree
Hide file tree
Showing 4 changed files with 354 additions and 3 deletions.
60 changes: 60 additions & 0 deletions src/data/array/lemmas.lean
Expand Up @@ -20,15 +20,23 @@ namespace array
instance {n α} [inhabited α] : inhabited (array n α) :=
d_array.inhabited

/- to_list -/

theorem to_list_of_heq {n₁ n₂ α} {a₁ : array n₁ α} {a₂ : array n₂ α}
(hn : n₁ = n₂) (ha : a₁ == a₂) : a₁.to_list = a₂.to_list :=
by congr; assumption

@[simp] theorem to_list_zero {α} (a : array 0 α) : a.to_list = [] :=
rfl

/- rev_list -/

section rev_list
variables {n : ℕ} {α : Type u} {a : array n α}

@[simp] theorem rev_list_zero (a : array 0 α) : a.rev_list = [] :=
rfl

theorem rev_list_reverse_aux : ∀ i (h : i ≤ n) (t : list α),
(a.iterate_aux (λ _, (::)) i h []).reverse_core t = a.rev_iterate_aux (λ _, (::)) i h t
| 0 h t := rfl
Expand Down Expand Up @@ -222,8 +230,60 @@ end
@[simp] theorem push_back_to_list : (a.push_back v).to_list = a.to_list ++ [v] :=
by rw [←rev_list_reverse, ←rev_list_reverse, push_back_rev_list, list.reverse_cons]

theorem read_push_back {i : fin n} :
a.read i = v ↔ (a.push_back v).read i.cast_succ = v ∧ (a.push_back v).read (fin.last n) = v :=
by cases i with _ i_lt_n;
simp [fin.cast_succ, fin.cast_add, fin.cast_le, fin.cast_lt, fin.last, read, push_back,
d_array.read, ne_of_lt i_lt_n]

end push_back

/- pop_back -/

section pop_back
variables {n : ℕ} {α : Type u}

theorem read_pop_back {v : α} {a : array (n+1) α} :
(∀ (i : fin (n+1)), a.read i = v) ↔
a.read (fin.last n) = v ∧ ∀ (i : fin n), a.pop_back.read i = v :=
iff.intro
(λ h, ⟨h (fin.last n), λ i, by rw ←h i.cast_succ; cases i; refl⟩)
(λ h i, begin
cases i with i i_lt_succ_n,
by_cases p : i = n,
{ subst p, rw ←h.1, refl },
{ rw ←h.2 ⟨i, nat.lt_of_le_and_ne (nat.le_of_lt_succ i_lt_succ_n) p⟩, refl }
end)

theorem pop_back_push_back {v : α} : ∀ {a : array (n+1) α},
a.read (fin.last n) = v → a = a.pop_back.push_back v
| ⟨a⟩ h := array.ext $ λ ⟨i, i_lt_n⟩,
by by_cases e : i = n; simp [push_back, pop_back, read, d_array.read, e]; exact h

@[simp] theorem pop_back_rev_list {a : array (n+1) α} :
a.read (fin.last n) :: a.pop_back.rev_list = a.rev_list :=
by rw ←push_back_rev_list; congr; exact (pop_back_push_back rfl).symm

theorem rev_list_repeat {v : α} : ∀ {n} {a : array n α},
a.rev_list = list.repeat v n ↔ ∀ i, a.read i = v
| 0 _ := ⟨λ _ i, by cases i.is_lt, by simp⟩
| (n+1) a :=
⟨λ h, by rw [list.repeat, ←pop_back_rev_list] at h;
exact read_pop_back.mpr
⟨list.head_eq_of_cons_eq h, rev_list_repeat.mp (list.tail_eq_of_cons_eq h)⟩,
λ h, by rw [list.repeat, pop_back_push_back (h (fin.last n)),
push_back_rev_list, rev_list_repeat.mpr (read_pop_back.mp h).2]⟩

theorem to_list_repeat {v : α} {a : array n α} :
a.to_list = list.repeat v n ↔ ∀ i, a.read i = v :=
by rw [←rev_list_reverse, ←list.reverse_repeat, list.reverse_inj]; exact rev_list_repeat

theorem to_list_join_nil {n} {a : array n (list α)} :
a.to_list.join = [] ↔ ∀ i, a.read i = [] :=
by simp [to_list_repeat.symm, list.join_eq_nil, list.eq_repeat]

end pop_back

/- foreach -/

section foreach
Expand Down
283 changes: 283 additions & 0 deletions src/data/hashmap.lean
@@ -0,0 +1,283 @@
/-
Copyright (c) 2019 Sean Leather. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sean Leather
Hash maps
-/
import data.array.lemmas data.list.alist data.pnat

universes u v

/-- Lift `f : α → ℕ` to `α → fin n` using the result modulo `n : ℕ+` -/
def fin.lift_mod_pnat (n : ℕ+) {α} (f : α → ℕ) (a : α) : fin n :=
⟨f a % n, nat.mod_lt _ n.property⟩

variables {n : ℕ} {α : Type u} {β : α → Type v}

/-- A hash map is valid if every bucket has no duplicate keys and every key
hash equals the array index of the key's bucket. -/
def hashmap.is_valid (bs : array n (list (sigma β))) (h : α → fin n) : Prop :=
∀ (i : fin n), (bs.read i).nodupkeys ∧ ∀ a ∈ (bs.read i).keys, i = h a

/-- A hash map is a finite key-value map implemented with an n-sized array of
buckets, a hash function, and a proof of validity. -/
structure hashmap (n : ℕ) {α : Type u} (β : α → Type v) :=
/- Array of buckets -/
(buckets : array n (list (sigma β)))
/- Hash function from key to bucket index -/
(hash : α → fin n)
/- Validity proof -/
(valid : hashmap.is_valid buckets hash)

/- Notes on alternative structures for a hashmap:
Bundling `n`:
```
structure hashmap {α : Type u} (β : α → Type v) :=
(n : ℕ)
(buckets : array n (list (sigma β)))
-- ...
```
Having `n` be a field instead of a parameter means that the size of the
underlying array would be hidden in the type of a hash map. At first blush,
this appears attractive because, from a usage perspective, it doesn't matter
what size the array is. However, the size is integral to the type of a hash
map, useful when equating two hashmaps (see `ext`), and necessary when indexing
into the `buckets`. So, rather than have use type such as `fin m.n`, we expose
`n`.
Using `alist`:
```
structure hashmap (n : ℕ) {α : Type u} (β : α → Type v) :=
(buckets : array n (alist β))
(hash : α → fin n)
(valid : ∀ (i : fin n), ∀ a ∈ (buckets.read i).keys, i = hash a)
```
Using `alist` instead of `list` in the `buckets` would allow us to reduce the
`valid` theorem as shown above. However, we cannot `map` an `array` to a
different element type (i.e. in `map f`, `f : α → α` and not `f : α → β`), and
that makes proofs significantly more difficult.
-/

namespace hashmap
open list

/- nth -/

/-- The list at the given bucket index -/
def nth (m : hashmap n β) : fin n → list (sigma β) :=
m.buckets.read

theorem nth_nodupkeys (m : hashmap n β) (i : fin n) : (m.nth i).nodupkeys :=
(m.valid i).left

theorem index_eq_hash {a} {i : fin n} {m : hashmap n β} (h : a ∈ (m.nth i).keys) :
i = m.hash a :=
(m.valid i).right a h

/- bucket -/

/-- The bucket for the given key -/
def bucket (m : hashmap n β) (a : α) : list (sigma β) :=
m.nth (m.hash a)

section bucket
variables {s : sigma β} {m : hashmap n β}

theorem mem_bucket_nth : s ∈ m.bucket s.1 ↔ ∃ (i : fin n), s ∈ m.nth i :=
⟨λ h, ⟨m.hash s.1, h⟩,
λ ⟨i, h⟩, by simpa only [index_eq_hash (mem_keys_of_mem h)] using h⟩

theorem mem_bucket_list : s ∈ m.bucket s.1 ↔ ∃ (l : list (sigma β)), l ∈ m.buckets ∧ s ∈ l :=
⟨λ h, ⟨m.bucket s.1, ⟨m.hash s.1, rfl⟩, h⟩,
λ ⟨l, h₁, h₂⟩, by cases h₁ with i h₁; induction h₁; exact mem_bucket_nth.mpr ⟨_, h₂⟩⟩

end bucket /- section -/

/- constructing empty hashmaps -/

/-- Construct an empty hash map with a `fin n`-valued hash function -/
def mk_empty (β : α → Type v) (f : α → fin n) : hashmap n β :=
⟨mk_array n [], f, λ _, ⟨nodupkeys_nil, λ _ h, by cases h⟩⟩

/-- Construct an empty hashmap with size `n > 0` and a `nat`-valued hash function -/
def mk_empty_pos (n : ℕ+) {α} (β : α → Type v) (f : α → ℕ) : hashmap n β :=
mk_empty β (fin.lift_mod_pnat n f)

/- extensionality -/

section ext
variables {m₁ m₂ : hashmap n β}

theorem ext_core : m₁.buckets = m₂.buckets → m₁.hash = m₂.hash → m₁ = m₂ :=
by cases m₁; cases m₂; intros; congr; repeat { assumption }

@[extensionality] theorem ext
(hr : ∀ (i : fin n), m₁.nth i = m₂.nth i)
(hh : ∀ a, m₁.hash a = m₂.hash a) :
m₁ = m₂ :=
ext_core (array.ext hr) (funext hh)

end ext /- section -/

/- mem -/

/-- The predicate `a ∈ m` means that `m` has a value associated to the key `a`. -/
instance : has_mem α (hashmap n β) :=
⟨λ a m, a ∈ (m.bucket a).keys⟩

section mem
variables {a : α} {m : hashmap n β}

theorem mem_bucket_keys : a ∈ m ↔ a ∈ (m.bucket a).keys :=
iff.rfl

theorem mem_bucket : a ∈ m ↔ ∃ (b : β a), sigma.mk a b ∈ m.bucket a :=
mem_bucket_keys.trans $ by simp [keys]

theorem mem_nth_keys : a ∈ m ↔ ∃ (i : fin n), a ∈ (m.nth i).keys :=
⟨λ h, ⟨m.hash a, h⟩, λ ⟨i, h⟩, by simpa only [index_eq_hash h] using h⟩

end mem /- section -/

/- is_empty -/

/-- A hash map is empty if every bucket is empty -/
def is_empty (m : hashmap n β) : Prop :=
∀ i : fin n, m.nth i = []

theorem is_empty_mk_empty (β) (f : α → fin n) : (mk_empty β f).is_empty :=
λ _, rfl

theorem is_empty_mk_empty_pos (n : ℕ+) (β) (f : α → ℕ) : (mk_empty_pos n β f).is_empty :=
λ _, rfl

@[simp] theorem is_empty_zero (m : hashmap 0 β) : m.is_empty :=
λ i, fin_zero_elim i

theorem mem_is_empty {m : hashmap n β} : m.is_empty ↔ ∀ a, a ∉ m :=
iff.intro
(λ h₁ a h₂, by simpa [mem_bucket_keys, bucket, h₁ (m.hash a)] using h₂)
(λ h₁ _, eq_nil_iff_forall_not_mem.mpr $ λ _ h₂, h₁ _ $
mem_nth_keys.mpr ⟨_, mem_keys_of_mem h₂⟩)

/-- The bucket list of a hashmap -/
def to_lists (m : hashmap n β) : list (list (sigma β)) :=
m.buckets.to_list

section to_lists
variables {i : ℕ} {m : hashmap n β} {l : list (sigma β)}

@[simp] theorem mem_to_lists : l ∈ m.to_lists ↔ l ∈ m.buckets :=
array.mem_to_list

theorem index_eq_hash_of_to_lists (he : (i, l) ∈ m.to_lists.enum) {a} (hl : a ∈ l.keys) :
i = (m.hash a).1 :=
have h₁ : ∃ p, m.nth ⟨i, p⟩ = l := array.mem_to_list_enum.1 he,
have h₂ : ∃ p, fin.mk i p = m.hash a := h₁.imp (λ p h, index_eq_hash (h.symm ▸ hl)),
let ⟨_, h⟩ := h₂ in by rw ←h

theorem nodupkeys_to_lists (h : l ∈ m.to_lists) : l.nodupkeys :=
by simp [to_lists] at h; cases h with i h; induction h; exact m.nth_nodupkeys i

theorem disjoint_keys_to_lists :
pairwise (λ l₁ l₂ : list (sigma β), disjoint l₁.keys l₂.keys) m.to_lists :=
begin
rw [←enum_map_snd m.to_lists, pairwise_map],
refine pairwise.imp_of_mem _ ((pairwise_map prod.fst).mp (nodup_enum_map_fst m.to_lists)),
rw prod.forall,
intros n₁ s₁,
rw prod.forall,
intros n₂ s₂ hme₁ hme₂ n₁_ne_n₂ a hm₁ hm₂,
apply n₁_ne_n₂,
rw index_eq_hash_of_to_lists hme₁ hm₁,
rw index_eq_hash_of_to_lists hme₂ hm₂
end

end to_lists /- section -/

/- to_alist -/

/-- The association list of a hash map -/
def to_alist (m : hashmap n β) : alist β :=
⟨m.to_lists.join, nodupkeys_join.mpr ⟨λ _, nodupkeys_to_lists, disjoint_keys_to_lists⟩⟩

section to_alist
variables {a : α} {b : β a} {m m₁ m₂ : hashmap n β}

theorem is_empty_to_alist : m.is_empty ↔ m.to_alist = ∅ :=
by simp only [is_empty, nth, to_alist, to_lists, has_emptyc.emptyc, array.to_list_join_nil]

theorem mem_bucket_to_alist : sigma.mk a b ∈ m.bucket a ↔ sigma.mk a b ∈ m.to_alist.entries :=
by simp [to_alist, mem_bucket_list]

theorem mem_to_alist : a ∈ m ↔ a ∈ m.to_alist :=
calc a ∈ m ↔ ∃ (b : β a), sigma.mk a b ∈ m.bucket a : mem_bucket
... ↔ ∃ (b : β a), sigma.mk a b ∈ m.to_alist.entries : exists_congr $ λ _, mem_bucket_to_alist
... ↔ a ∈ m.to_alist : alist.mem_entries.symm

theorem to_alist_ext (h : a ∈ m₁.to_alist ↔ a ∈ m₂.to_alist) : a ∈ m₁ ↔ a ∈ m₂ :=
by simp only [mem_to_alist, h]

end to_alist /- section -/

/- keys -/

/-- The list of keys of a hash map. -/
def keys (m : hashmap n β) : list α :=
m.to_alist.keys

theorem keys_nodup (m : hashmap n β) : m.keys.nodup :=
m.to_alist.keys_nodup

section decidable_eq_α
variables [decidable_eq α]

/- lookup -/

/-- Look up a key in a hash map to find the value, if it exists -/
def lookup (a : α) (m : hashmap n β) : option (β a) :=
lookup a $ m.bucket a

section lookup
variables {a : α} {b : β a} {m : hashmap n β}

theorem lookup_is_some : (m.lookup a).is_some ↔ a ∈ m :=
lookup_is_some

theorem lookup_eq_none : m.lookup a = none ↔ a ∉ m :=
lookup_eq_none

theorem mem_lookup_iff : b ∈ m.lookup a ↔ sigma.mk a b ∈ m.bucket a :=
mem_lookup_iff (nth_nodupkeys _ _)

theorem perm_lookup {m₁ m₂ : hashmap n β} :
m₁.bucket a ~ m₂.bucket a → m₁.lookup a = m₂.lookup a :=
perm_lookup a (nth_nodupkeys _ _) (nth_nodupkeys _ _)

theorem lookup_to_alist : m.lookup a = m.to_alist.lookup a :=
begin
by_cases h : a ∈ m,
{ cases exists_of_mem_keys h with b h,
have h₁ : m.lookup a = some b := mem_lookup_iff.mpr h,
have h₂ : m.to_alist.lookup a = some b :=
alist.mem_lookup_iff.mpr (mem_bucket_to_alist.mp h),
rwa ←h₂ at h₁},
{ rw lookup_eq_none.mpr h,
rw alist.lookup_eq_none.mpr ((not_iff_not_of_iff mem_to_alist).mp h) }
end

theorem is_empty_lookup : m.is_empty ↔ ∀ a, m.lookup a = none :=
mem_is_empty.trans $ by simp only [lookup_eq_none.symm]

end lookup /- section -/

end decidable_eq_α /- section -/

end hashmap
6 changes: 6 additions & 0 deletions src/data/list/alist.lean
Expand Up @@ -37,6 +37,9 @@ instance : has_mem α (alist β) := ⟨λ a s, a ∈ s.keys⟩

theorem mem_keys {a : α} {s : alist β} : a ∈ s ↔ a ∈ s.keys := iff.rfl

theorem mem_entries {a} {s : alist β} : a ∈ s ↔ ∃ (b : β a), sigma.mk a b ∈ s.entries :=
list.mem_keys

theorem mem_of_perm {a : α} {s₁ s₂ : alist β} (p : s₁.entries ~ s₂.entries) : a ∈ s₁ ↔ a ∈ s₂ :=
mem_of_perm $ perm_map sigma.fst p

Expand Down Expand Up @@ -73,6 +76,9 @@ theorem lookup_eq_none {a : α} {s : alist β} :
lookup a s = none ↔ a ∉ s :=
lookup_eq_none

theorem mem_lookup_iff {a} {b : β a} {s : alist β} : b ∈ lookup a s ↔ sigma.mk a b ∈ s.entries :=
mem_lookup_iff s.nodupkeys

theorem perm_lookup {a : α} {s₁ s₂ : alist β} (p : s₁.entries ~ s₂.entries) :
s₁.lookup a = s₂.lookup a :=
perm_lookup _ s₁.nodupkeys s₂.nodupkeys p
Expand Down

0 comments on commit 5a2269b

Please sign in to comment.