# Hash table example

In [None]:
import sys
sys.path.append("..")

from run_ex1 import tokenize, load_gutenberg_text, build_word_counts

In [None]:
URL = "https://www.gutenberg.org/files/98/98-0.txt"
text = load_gutenberg_text(URL)
words = tokenize(text)
table = build_word_counts(words, capacity=32_768)

In [3]:
print("Unique words:", table.size)
print("Count('the'):", table.get("the"))

Unique words: 9944
Count('the'): 8242


In [4]:
# 10 sample items (arbitrary order due to probing)
n = 0
for k, v in table.items():
    print(k, v)
    n += 1
    if n == 10:
        break

grinding 1
xix 2
dispense 2
wither 1
thankful 7
ward 3
east 3
inquired 8
quest 3
perspective 2


## Recency

In [5]:
# Most and least recent
print("Most recent change:", table.get_last())
print("Least recent change:", table.get_first())

Most recent change: ('ebooks', 7)
Least recent change: ('author', 1)


In [6]:
# Most and least recent after change to existing word
table.insert('thrown', 10)
print("Most recent change:", table.get_last())
print("Least recent change:", table.get_first())

Most recent change: ('thrown', 10)
Least recent change: ('author', 1)


In [7]:
# Most and least recent after removing most recent change
table.remove('thrown')
print("Most recent change:", table.get_last())
print("Least recent change:", table.get_first())

Most recent change: ('ebooks', 7)
Least recent change: ('author', 1)


In [8]:
# Most and least recent after removing arbitrary word
table.remove('hanging')
print("Most recent change:", table.get_last())
print("Least recent change:", table.get_first())

Most recent change: ('ebooks', 7)
Least recent change: ('author', 1)


In [9]:
# Most and least recent after adding new word
table.insert('new_word', 10)
print("Most recent change:", table.get_last())
print("Least recent change:", table.get_first())

Most recent change: ('new_word', 10)
Least recent change: ('author', 1)


In [10]:
# Most and least recent after removing most least recent
table.remove('author')
print("Most recent change:", table.get_last())
print("Least recent change:", table.get_first())

Most recent change: ('new_word', 10)
Least recent change: ('january', 1)


## Remove functionality

In [11]:
# Removing non-existing word fails
table.remove('octopus')

KeyError: 'octopus'

In [12]:
# Remove existing word
table.remove('comforted')

In [13]:
# Second removal of the same word fails
table.remove('comforted')

KeyError: 'comforted'

## Insert functionality

In [12]:
print('"chips" value pre change:', table.get('chips'))
print('changing "chips" value to 10')
table.insert('chips', 10)
print('"chips" value post change:', table.get('chips'))

"chips" value pre change: 10
changing "chips" value to 10
"chips" value post change: 10


In [16]:
# Non existing word
print('"electric" value pre change:', table.get('electric'))

KeyError: 'electric'

In [17]:
# Add "electric" with value=10
table.insert('electric', 10)
print('"electric" value in hash table:', table.get('electric'))

add "electric" with value=10
"electric" value in hash table: 10
