# Mod 5.1: Hashing and Searching


## Hashing

In Python, we implement hash tables through dictionaries.

In [1]:
## creating hash table; runs in O(1)
grades = {}

In [2]:
## creating hash table with an entry; runs in O(1)
grades = {"Alice": 95}

In [4]:
## adding entries; runs in O(1)
grades["Bob"] = 87
grades

{'Alice': 95, 'Bob': 87}

In [5]:
## updating entries
grades["Alice"] = 98 
grades

{'Alice': 98, 'Bob': 87}

In [6]:
## accessing entries; runs in O(1)
print(grades["Alice"])

98


In [7]:
## make sure there is no error; also runs in O(1)
print(grades.get("David", "Not found"))  # Safe access

Not found


In [8]:
## deleting entries; also runs in O(1)
del grades["Bob"]
grades.pop("Alice")

98

In [11]:
## check membership; also runs in O(1)
if 87 in grades:
    print("Found!")

### Why Use Hash Tables

Because they're fast.

In [12]:
import time
import random

In [13]:
## finding entry in a list; runs in O(n)
students_list = [("Alice", 95), ("Bob", 87), ("Charlie", 92)]
start = time.time()
for name, grade in students_list:
    if name == "Charlie":
        print(grade)
print(f"List: {time.time() - start}")

## finding entry in a hash table; runs in O(1)
students_dict = {"Alice": 95, "Bob": 87, "Charlie": 92}
start = time.time()
print(students_dict["Charlie"])
print(f"Dict: {time.time() - start}")

92
List: 0.00011944770812988281
92
Dict: 3.790855407714844e-05


In [14]:
## generate large databases of students
num_students = 100000
students_list = [(f"Student_{i}", random.randint(60, 100)) for i in range(num_students)]
students_dict = {f"Student_{i}": random.randint(60, 100) for i in range(num_students)}

In [15]:
target = f"Student_{num_students - 1000}"

In [16]:
## finding entry in a list; runs in O(n)
start = time.time()
for name, grade in students_list:
    if name == target:
        result = grade
        break
list_time = time.time() - start
print(f"List search time: {list_time:.6f} seconds")

## finding entry in a hash table; runs in O(1)
start = time.time()
result = students_dict[target]
dict_time = time.time() - start
print(f"Dict search time: {dict_time:.6f} seconds")

List search time: 0.005640 seconds
Dict search time: 0.000034 seconds


## Searching Algorithms

### Linear Search

In [19]:
## goes through each element left to right; runs in O(n)
numbers = [64, 34, 25, 12, 22, 11, 90]
target = 27
found_index = -1

for i in range(0,7):
    if numbers[i] == target:
        found_index = i
        break

print(f"Found at index: {found_index}")

Found at index: -1


### Binary Search

In [21]:
numbers = [11, 25, 12, 22, 34, 64, 90] ## list must be sorted
target = 25
start_idx = 0
end_idx = len(numbers) - 1
found_index = -1

while start_idx <= end_idx:
    mid_idx = (start_idx + end_idx) // 2
    
    if numbers[mid_idx] == target:
        found_index = mid_idx
        break
    elif numbers[mid_idx] < target:
        start_idx = mid_idx + 1 
    elif numbers[mid_idx] > target:
        end_idx = mid_idx - 1
        
found_index 

-1