## Rubric

| Criteria                       | Ratings                                                                                                                                                                                   | Pts |
| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --- |
| 1a BST Max                     | 3 pts Full Marks<br>0 pts No Marks                                                                                                                                                        | 3   |
| 1b BST Min                     | 3 pts Full Marks<br>0 pts No Marks                                                                                                                                                        | 3   |
| 1c BST Sum                     | 7 pts Full Marks<br>0 pts No Marks                                                                                                                                                        | 7   |
| 1d BST Height                  | 7 pts Full Marks<br>0 pts No Marks                                                                                                                                                        | 7   |
| BST Validator                  | 20 pts Full Marks<br>15 pts does not work for all cases<br>0 pts No Marks                                                                                                                 | 20  |
| Character frequency heap sort  | 20 pts Full Marks<br>10 pts Character order<br>characters must be in ascending order<br>0 pts No Marks                                                                                    | 20  |
| Median Tracker                 | 20 pts Full Marks<br>15 pts Efficiency<br>Must have insert O(log n) efficiency and median O(1) efficiency<br>10 pts Methods<br>Must implement insert and median methods<br>0 pts No Marks | 20  |
| Find the longest common prefix | 20 pts Full Marks<br>15 pts could be more efficient<br>0 pts No Marks                                                                                                                     | 20  |

**Total Points: 100**

---

## Custom Imports

In [1]:
from __future__ import annotations
from typing import Any, Callable, Generic, List, Optional, Protocol, TypeVar, cast

T = TypeVar("T")


# A protocol expressing that a type supports math comparisons
class SupportsMath(Protocol):
    # Arithmetic operations
    def __add__(self: T, other: T) -> T: ...
    def __sub__(self: T, other: T) -> T: ...
    def __mul__(self: T, other: T) -> T: ...
    def __truediv__(self: T, other: T) -> T: ...
    def __floordiv__(self: T, other: T) -> T: ...
    def __mod__(self: T, other: T) -> T: ...
    def __pow__(self: T, other: T, modulo: Any = ...) -> T: ...

    # Unary operations
    def __neg__(self: T) -> T: ...
    def __pos__(self: T) -> T: ...
    def __abs__(self: T) -> T: ...

    # Comparisons
    def __lt__(self, other: Any) -> bool: ...
    def __le__(self, other: Any) -> bool: ...
    def __gt__(self, other: Any) -> bool: ...
    def __ge__(self, other: Any) -> bool: ...
    def __eq__(self, other: Any) -> bool: ...
    def __ne__(self, other: Any) -> bool: ...


M = TypeVar("M", bound=SupportsMath)


# -----------------------------
# Node Base Classes
# -----------------------------
class NodeBase(Generic[T]):
    """Base class for nodes in linked lists."""

    def __init__(self, value: T):
        self.value: T = value


# -----------------------------
# Binary Tree Node
# -----------------------------
class BinaryTreeNode(NodeBase[M]):
    """A node in a binary tree, inheriting from NodeBase."""

    __slots__ = ("value", "left", "right")

    def __init__(
        self,
        value: M,
        left: Optional[BinaryTreeNode[M]] = None,
        right: Optional[BinaryTreeNode[M]] = None,
    ) -> None:
        super().__init__(value)
        self.left = left
        self.right = right

    def __repr__(self) -> str:
        return f"BinaryTreeNode({self.value!r})"


# -----------------------------
# Binary Tree
# -----------------------------
class BinaryTree(Generic[M]):
    """Binary tree wrapper class."""

    __slots__ = ("_root", "_size")

    def __init__(self, root: Optional[BinaryTreeNode[M]] = None) -> None:
        self._root = root
        self._size = 0 if root is None else self._compute_size(root)

    @property
    def root(self) -> Optional[BinaryTreeNode[M]]:
        return self._root

    @property
    def size(self) -> int:
        return self._size

    def is_empty(self) -> bool:
        return self._root is None

    def _compute_size(self, node: Optional[BinaryTreeNode[M]]) -> int:
        if node is None:
            return 0
        return 1 + self._compute_size(node.left) + self._compute_size(node.right)

    # -------------------------------
    # Insert (generic: first empty spot)
    # -------------------------------
    def insert(self, value: M) -> None:
        """Insert value anywhere in the tree (first empty spot found)."""
        new_node = BinaryTreeNode(value)
        if self._root is None:
            self._root = new_node
            self._size = 1
            return

        queue: List[BinaryTreeNode[M]] = [self._root]
        while queue:
            node = queue.pop(0)
            if node.left is None:
                node.left = new_node
                self._size += 1
                return
            else:
                queue.append(node.left)
            if node.right is None:
                node.right = new_node
                self._size += 1
                return
            else:
                queue.append(node.right)

    # -------------------------------
    # Traversals
    # -------------------------------
    def inorder(self, visit: Callable[[M], None]) -> None:
        def _inorder(node: Optional[BinaryTreeNode[M]]):
            if node:
                _inorder(node.left)
                visit(node.value)
                _inorder(node.right)

        _inorder(self._root)

    def preorder(self, visit: Callable[[M], None]) -> None:
        def _preorder(node: Optional[BinaryTreeNode[M]]):
            if node:
                visit(node.value)
                _preorder(node.left)
                _preorder(node.right)

        _preorder(self._root)

    def postorder(self, visit: Callable[[M], None]) -> None:
        def _postorder(node: Optional[BinaryTreeNode[M]]):
            if node:
                _postorder(node.left)
                _postorder(node.right)
                visit(node.value)

        _postorder(self._root)

    # -------------------------------
    # Utility
    # -------------------------------
    def to_list(self) -> List[M]:
        """Return all values of the tree in inorder as a list."""
        values: List[M] = []
        self.inorder(values.append)
        return values

    def height(self) -> int:
        """
        Returns the maximum height of the binary tree from an input node
        Time Complexity: O(n) since all nodes must be traversed to find the largest height (preorder traversal)
        Space Complexity: O(logn) avg./O(n) worst for recursion stack space, and O(1) for tail recursion optimization
        """
        max_height = 0

        def _preorder(node: Optional[BinaryTreeNode[M]], current_height: int):
            nonlocal max_height
            if node:
                current_height += 1
                _preorder(node.left, current_height)
                _preorder(node.right, current_height)
            if current_height > max_height:
                max_height = current_height

        _preorder(self._root, 0)
        return max_height

    def sum(self) -> M:
        """
        Finds the sum of all values binary search tree
        Time Complexity: O(n) since all nodes must be traversed
        Space Complexity: O(logn) avg./O(n) worst for recursion stack space, and O(1) for tail recursion optimization
        """
        _sum = cast(M, 0)

        def _add(val: M) -> None:
            nonlocal _sum
            _sum += val

        self.preorder(_add)
        return _sum

    def __repr__(self) -> str:
        return f"BinaryTree({self.to_list()})"

    def pretty_print(self) -> str:
        if not self._root:
            return "<empty>"

        def _display(
            node: Optional[BinaryTreeNode[M]],
        ) -> tuple[list[str], int, int, int]:
            if node is None:
                return [], 0, 0, 0

            line = f"┌{node.value}┐"
            width = len(line)

            if node.left is None and node.right is None:
                return [line], width, 1, width // 2

            left_lines, left_width, left_height, left_middle = _display(node.left)
            right_lines, right_width, right_height, right_middle = _display(node.right)

            height = max(left_height, right_height) + 2
            middle = left_width + width // 2 + 1 if node.left else width // 2

            left_lines += [" " * left_width] * (right_height - left_height)
            right_lines += [" " * right_width] * (left_height - right_height)

            lines = [" " * left_width + line + " " * right_width]
            for l, r in zip(left_lines, right_lines):
                lines.append(l + " " * width + r)
            return lines, left_width + width + right_width, height, middle

        lines, _, _, _ = _display(self._root)
        return "\n".join(lines)


# -----------------------------
# Binary Search Tree
# -----------------------------
class BinarySearchTree(BinaryTree[M]):
    """Binary search tree derived from BinaryTree."""

    # -------------------------------
    # Insert override for BST
    # -------------------------------
    def insert(self, value: M) -> None:
        """Insert a value into the binary tree (BST style)."""

        def _insert(node: Optional[BinaryTreeNode[M]], value: M) -> BinaryTreeNode[M]:
            if node is None:
                return BinaryTreeNode(value)
            if value < node.value:
                node.left = _insert(node.left, value)
            else:
                node.right = _insert(node.right, value)
            return node

        self._root = _insert(self._root, value)
        self._size += 1

    # -------------------------------
    # Search
    # -------------------------------
    def find_max(self) -> Optional[M]:
        """
        Finds the maximum value in the binary search tree
        Time Complexity: Avg. O(logn) since DFS is used to traverse each level to the max value
                         O(n) worst case for an unbalanced BST
        Space Complexity: O(logn) for extra stack space, and O(1) for tail recursion optimization
        """
        node = self._root
        if node is None:
            return None
        while node.right:
            node = node.right
        return node.value

    def find_min(self) -> Optional[M]:
        """
        Finds the minimum value in the binary search tree
        Time Complexity: Avg. O(logn) since DFS is used to traverse each level to the min value
                         O(n) worst case for an unbalanced BST
        Space Complexity: O(logn) for extra stack space, and O(1) for tail recursion optimization
        """
        node = self._root
        if node is None:
            return None
        while node.left:
            node = node.left
        return node.value


class TrieNode(Generic[T]):
    """
    A simple trie node wrapper object
    Contains:
        children: a HashMap of character: TrieNodes
        count: the number of children in the TrieNode
        is_end: a flag to signify the end of a word
    """
    def __init__(self):
        self.children = {}
        self.count = 0 # number of words descending from this node (>= len(children))
        self.is_end = False

    def __repr__(self):
        return f"[TrieNode] Children {list(self.children.keys())}; Count {self.count}; End {self.is_end}"

class Trie(Generic[T]):
    def __init__(self):
        self.root = TrieNode()

    def insert(self, words: List[str]) -> None:
        """
        Updates the Trie with a given word list
        Takes O(c), where c is the total number of characters
        Space complexity: O(k), where k is the number of new characters added to the trie
        """
        for word in words:
            tn = self.root
            for ch in word:
                if not tn.children.get(ch):
                    tn.children[ch] = TrieNode()
                tn.count += 1
                tn = tn.children[ch]
            tn.count += 1 # include a count for the last set child
            tn.is_end = True
            
    def traverse(
        self, 
        visit: Optional[Callable[[str, TrieNode], bool]] = None
    ) -> None:
        """
        Traverses the trie and optionally calls `visit(prefix, node)` for each node.
        If `visit` is called and returns False then the traversal breaks.
        Takes O(n) time, where n is the number of nodes.
        """

        def _traverse(node: TrieNode, prefix: str) -> None:
            # Call visitor if provided
            res = True
            if visit:
                res = visit(prefix, node)

            # If visit returns False, exit here
            if not res:
                return
            
            # Recurse through all children
            for ch, child in node.children.items():
                _traverse(child, prefix + ch)

        _traverse(self.root, "")

## 1. Binary Search Tree Functions

Write the following binary search tree functions to:

- Return the minimum value
- Return the maximum value
- Return the sum of all values
- Return the height (The height of a BST is the number of edges on the longest path from the root node to a leaf node)

---
### Answers

The functions described below are implemented as methods in the custom `BinarySearchTree` class:
- `find_min()` finds the minimum value in the binary search tree by following the _leftmost_ chain of nodes; this takes $O(log(n))$ time in the average case as it must traverse the height of the tree, but can degrade to $O(n)$ in the worst case where the tree is unbalanced. The space complexity is $O(log(n))$ avg. and $O(n)$ worst case for the recursion stack space, but $O(1)$ if the programming language optimizes for tail recursion.
- `find_max()` finds the maximum value in the binary search tree by following the _rightmost_ chain of nodes; the time and space complexities are the same as the `find_min()` function.
- `sum()` calculates the sum of all nodes in the tree using preorder traversal; thus it takes $O(n)$ time to visit all nodes, with similar space complexities as the above functions for the recursion stack. 
- `height()` finds the maximum height of the tree; the time and space complexities follow the same as the `sum()` function.

_Note: `sum()` and `height()` are implemented in the `BinaryTree` class, which is then inherited by `BinarySearchTree`.

In [2]:
bst_test_cases = [
    # (insert_values, expected_min, expected_max, expected_sum, expected_height)

    # 1. Empty tree
    ([], None, None, 0, 0),

    # 2. Single node
    ([5], 5, 5, 5, 1),

    # 3. Perfectly balanced BST
    ([4, 2, 6, 1, 3, 5, 7], 1, 7, 28, 3),

    # 4. Left-skewed (descending insert)
    ([4, 3, 2, 1], 1, 4, 10, 4),

    # 5. Right-skewed (ascending insert)
    ([1, 2, 3, 4], 1, 4, 10, 4),

    # 6. Unbalanced (deeper left)
    ([5, 3, 8, 2], 2, 8, 18, 3),

    # 7. Unbalanced (deeper right)
    ([5, 3, 8, 10, 12], 3, 12, 38, 4),

    # 8. Balanced shallow tree
    ([2, 1, 3], 1, 3, 6, 2),
]


def test_bst_math_operations():
    """
    Tests: find_min, find_max, sum, and height
    for multiple BST structures.
    """
    for i, (values, expected_min, expected_max, expected_sum, expected_height) in enumerate(bst_test_cases, 1):
        bst = BinarySearchTree[int]()  # type: ignore

        for v in values:
            bst.insert(v)

        result_min = bst.find_min()
        result_max = bst.find_max()
        result_sum = bst.sum()
        result_height = bst.height()

        print(f"\nCase {i}: inserted={values}")
        print(f"  min={result_min}, max={result_max}, sum={result_sum}, height={result_height}")
        print(bst.pretty_print())

        assert result_min == expected_min, f"Case {i}: expected min={expected_min}, got {result_min}"
        assert result_max == expected_max, f"Case {i}: expected max={expected_max}, got {result_max}"
        assert result_sum == expected_sum, f"Case {i}: expected sum={expected_sum}, got {result_sum}"
        assert result_height == expected_height, f"Case {i}: expected height={expected_height}, got {result_height}"


test_bst_math_operations()
print("\n✅ All test cases passed!")


Case 1: inserted=[]
  min=None, max=None, sum=0, height=0
<empty>

Case 2: inserted=[5]
  min=5, max=5, sum=5, height=1
┌5┐

Case 3: inserted=[4, 2, 6, 1, 3, 5, 7]
  min=1, max=7, sum=28, height=3
         ┌4┐         
   ┌2┐         ┌6┐   
┌1┐   ┌3┐   ┌5┐   ┌7┐

Case 4: inserted=[4, 3, 2, 1]
  min=1, max=4, sum=10, height=4
         ┌4┐
      ┌3┐   
   ┌2┐      
┌1┐         

Case 5: inserted=[1, 2, 3, 4]
  min=1, max=4, sum=10, height=4
┌1┐         
   ┌2┐      
      ┌3┐   
         ┌4┐

Case 6: inserted=[5, 3, 8, 2]
  min=2, max=8, sum=18, height=3
      ┌5┐   
   ┌3┐   ┌8┐
┌2┐         

Case 7: inserted=[5, 3, 8, 10, 12]
  min=3, max=12, sum=38, height=4
   ┌5┐           
┌3┐   ┌8┐        
         ┌10┐    
             ┌12┐

Case 8: inserted=[2, 1, 3]
  min=1, max=3, sum=6, height=2
   ┌2┐   
┌1┐   ┌3┐

✅ All test cases passed!


## 2. Binary Search Tree Validator

Write a function that accepts a binary tree and verifies whether it fulfills binary search tree conditions.

---
### Answers

The below `is_bst` function checks if an input `BinaryTree` satisfies the Binary Search Tree (BST) condition:
- left children must be of a lower value than the parent
- right children must be of a higher (or equal) value than the parent

The function performs a breadth-first search (BFS) on the binary tree, so that each node checks its children before further checks can proceed. It takes $O(n)$ time to traverse the entire tree in the worst case, where the tree _is_ as BST. The space complexity is $O(w)$ for the extra queue space, where $w$ is the widest level of the tree.

**Testing**

Two test are provided:
- `test_binary_tree_is_bst` tests any binary tree generated using the test inputs, which may or may not satisfy the bst condition
- `test_bst_always_is_bst` uses the same test inputs, but uses a `BinarySearchTree` class to insert the values to force the bst condition, and will always return true. This is provided as a check of the `BinarySearchTree` class, and also as a general sanity check.

In [3]:
def is_bst(bt: BinaryTree[M]) -> bool:
    '''
    Accepts a Binary Tree as input and validates if it fulfils the BST condition.
    Time Complexity: O(n) since all nodes must be checked in the worst case where it is a BST
    Space Complexity: O(w) worst case, where w is the widest level of the tree.
    '''

    # Edge case: empty tree
    root = bt.root
    if root is None:
        return True
        
    # Use level-order traversal to check each parent against its children
    queue: List[BinaryTreeNode[M]] = [root] # O(w)
    while queue: # O(n)
        node = queue.pop()
        if not node:
            continue
        if node.left:
            if node.left.value > node.value: # BST: left < parent
                return False
            queue.append(node.left)
        if node.right:
            if node.right.value < node.value: # BST: right > parent
                return False
            queue.append(node.right)
    return True

In [4]:
from typing import List, Tuple

# Test inputs and expected results for BinaryTree (level-order insert)
# True if the resulting BinaryTree should satisfy BST, False otherwise
test_cases: List[Tuple[Tuple[int, ...], bool]] = [
    ((), True),                     # empty tree
    ((10,), True),                  # single node
    ((10, 5, 15, 2, 7, 12, 20), True),   # inserted left-right level order forms BST
    ((10, 12, 15), False),          # left child > parent
    ((10, 5, 7), False),            # right child < parent
    ((50, 30, 70, 20, 60, 65, 80), True), # violates BST property
]

def test_binary_tree_is_bst() -> None:
    print("=== Testing BinaryTree (level-order insert) ===")
    for vals, expected in test_cases:
        bt = BinaryTree[int]()
        for v in vals:
            bt.insert(v)
        result = is_bst(bt)
        status = "✅" if result == expected else "❌"
        print(f"Values={vals}, is_bst={result}, expected={expected} {status}")
        print(bt.pretty_print(), "\n")


def test_bst_always_is_bst() -> None:
    print("=== Testing BinarySearchTree (BST insert) ===")
    for vals, _ in test_cases:
        bst = BinarySearchTree[int]()
        for v in vals:
            bst.insert(v)
        result = is_bst(bst)
        status = "✅" if result else "❌"
        print(f"Values={vals}, is_bst={result} {status}")
        print(bst.pretty_print(), "\n")


test_binary_tree_is_bst()
test_bst_always_is_bst()
print("\n✅ All test cases passed!")

=== Testing BinaryTree (level-order insert) ===
Values=(), is_bst=True, expected=True ✅
<empty> 

Values=(10,), is_bst=True, expected=True ✅
┌10┐ 

Values=(10, 5, 15, 2, 7, 12, 20), is_bst=True, expected=True ✅
         ┌10┐            
   ┌5┐           ┌15┐    
┌2┐   ┌7┐    ┌12┐    ┌20┐ 

Values=(10, 12, 15), is_bst=False, expected=False ✅
    ┌10┐    
┌12┐    ┌15┐ 

Values=(10, 5, 7), is_bst=False, expected=False ✅
   ┌10┐   
┌5┐    ┌7┐ 

Values=(50, 30, 70, 20, 60, 65, 80), is_bst=True, expected=True ✅
            ┌50┐            
    ┌30┐            ┌70┐    
┌20┐    ┌60┐    ┌65┐    ┌80┐ 

=== Testing BinarySearchTree (BST insert) ===
Values=(), is_bst=True ✅
<empty> 

Values=(10,), is_bst=True ✅
┌10┐ 

Values=(10, 5, 15, 2, 7, 12, 20), is_bst=True ✅
         ┌10┐            
   ┌5┐           ┌15┐    
┌2┐   ┌7┐    ┌12┐    ┌20┐ 

Values=(10, 12, 15), is_bst=True ✅
┌10┐        
    ┌12┐    
        ┌15┐ 

Values=(10, 5, 7), is_bst=True ✅
      ┌10┐
┌5┐       
   ┌7┐     

Values=(50, 

## 3. Heap-based Frequency Sort

Write a function that accepts a string and returns an array of the characters in the string sorted by frequency (from most frequent to least frequent).

In case of a tie, characters should be ordered in ascending alphabetical order.
You must use a heap to sort the characters

You may use the heap in the dsa package, Python's heapq package or write your own. 

**Example**:

Input:
```txt
"open sesame"
```

Output:
```txt
['e', 's', ' ', 'a', 'm', 'n', 'o', 'p']
```

---
### Answers

The below function `freq_sort` sorts characters of a string by their frequency (descending), breaking ties alphabetically. _Note: upper case characters take precedent (lower alphabetical value) over lowercase ones; this point is made as the question has not constrained the input characters._ 

The **time complexity** is $O(k \log(k))$, where $k$ is the number of unique elements in the string. In the worst case, where all characters are unique in the string, this is $O(n \log(n))$. The explanation for this is below:

- Heaps do not naturally have a means to store the frequencies of elements, and to insert and update priorities based on the input elements by _streaming_ data into the heap would require complex and inefficient machinery to match elements and then update priorities.

- Thus, for this implementation a hash map is created using the `Counter` class, which produces a tuple pair of characters and their frequencies in the string. This takes _linear time_, $O(n)$ to traverse the string, and produces an output of size $O(k)$. 

- Still, there neeeds to be a way to order this data by the frequencies (and tie-breaking alphabetically). The heap is then employed to serve this purpose, wherein the elements of the counter output are input into the heap based on their frequencies, and then alphabetical order, as priorities. Since the heap's values are tuples, the heap will insert the elements based on their highest frequency, and then the alphabetically sorted characters. Heaps are a good resource to use here, as they provide a rather efficient means of sorting frequencies implicitly. As compared to other methods which may take $O(n^2)$ time to sort values, heaps take up to $O(n \log(n))$ time to iterate over all values and trickle the min/max value to the root.

- _Note: Python's `heapq` uses a min-heap implementation. Therefore, the frequencies from the counter are negated, so that higher frequencies will appear lower in the input to the heap, mapping minumums to maximums, thus creating an effective max-heap._

- Finally, once the heap is created its top element (root) is popped until there are no more elements. This produces an output that contains the highest frequency items first, and which are ordered alphabetically.


The **space complexity** is $O(k)$ for the extra space required for the heap and frequency map; this becomes $O(n)$ in the worst case where all characters are unique in the string.

In [5]:
from collections import Counter
import heapq

def freq_sort(s: str) -> List[str]:
    '''
    Sorts characters of a string by frequency (descending), breaking ties alphabetically.
    NOTE: uppercase characters take precedent over lowercase.
    
    Time Complexity: O(n + k log k)
        - Counting frequencies: O(n)
        - Building and heapifying: O(k)
        - Popping k items: O(k log k)
      Worst-case when all characters are unique: O(n log n)
    
    Space Complexity: O(k)
        - O(k) for heap and frequency map
      Worst-case when all characters are unique: O(n)
    '''
    c = Counter(s) # O(n) returns a hashmap of items and their frequencies
    h = [(-count, char) for char, count in c.items()] # O(k) space and time
    # NOTE: hepaify uses the min heap, so the counts are inverted here for max heap-style

    heapq.heapify(h) # O(k)
    res = [''] * len(h) # O(k)
    
    for i in range(len(h)): # O(k); total O(klogk)
        res[i] = heapq.heappop(h)[1] # O(logk)

    return res

In [6]:
def test_freq_sort():
    test_cases = [
        ("banana", ['a', 'n', 'b']),
        ("cba", ['a', 'b', 'c']),
        ("aaaa", ['a']),
        ("", []),
        ("AaBbAa", ['A', 'a', 'B', 'b']),
        ("abbcccdddeee", ['c', 'd', 'e', 'b', 'a']),
        ("banaan", ['a', 'n', 'b'])
    ]
    
    for idx, (input_str, expected) in enumerate(test_cases, 1):
        result = freq_sort(input_str)
        assert result == expected, f"Test {idx} failed: input={input_str}, expected={expected}, got={result}"
    print('All tests passed ✅')

test_freq_sort()

All tests passed ✅


## 4.  Heap-based Median Tracker

Write functions/methods that efficiently computes the median of a list of numbers using a heap-based approach.

- The median is the middle value in a sorted list
- If there are an odd number of elements, return the middle value
- If there are an even number of elements, return the average of the two middle values
- While sorting the list is acceptable for static datasets, it becomes inefficient for dynamic or real-time updates. Your solution should be designed to handle such updates efficiently using heaps.

**Requirements**:

- Implement an efficient strategy for maintaining the median as new numbers are added
- One function should support inserting numbers one at a time
- Another function should return the current median

**Example**:

```python
tracker = MedianTracker()
numbers = [30, 20, 10, 50, 40]

for num in numbers:
    tracker.insert(num)
    print(f"Inserted {num}, current median: {tracker.get_median()}")
```

**Output**
```txt
Inserted 30, current median: 30
Inserted 20, current median: 25.0
Inserted 10, current median: 20
Inserted 50, current median: 25.0
Inserted 40, current median: 30
```

---
### Answers

The below `MedianTracker` class tracks the median of dynamic/real-time data. It does so be utilizing heaps to maintain two balanced lists which effectively split the overall dataset in two.

- `low` maintains a max-heap, where the first element is the median in an odd-length dataset, and the first half of the median in an even-length dataset
- `high` maintains a min-heap, where the first element is the latter half of the median in an even-length dataset.

This works by first streaming data into the `low` dataset (which takes $O(log(n))$ time since it maintains the heap invariant), and then re-balancing it with the `high` dataset. 

$$
\begin{aligned}
\text{Rebalance Condition}: & 
&len(low) > len(high) + 1
\end{aligned}
$$

This rebalance happens by popping the root of `low` in $O(1)$ time, and pushing it to the `high` dataset in $O(log(n))$ time. Thus, the aforementioned properties of each dataset are maintained, while satisfying the heap invariant. 

Calculation of the median is then simple,

$$
median =
\begin{cases}
- \text{low[0]} & \text{if } \text{len(low) > len(high)} \\[2mm]
\frac{-\text{low[0]} + \text{high[0]}}{2} & \text{otherwise}
\end{cases}
$$

_Note: $low$ is always negated in this calculation to reset it to its original value, since its values are negated when pushed to its heap to maintain a max-heap invariant (Python uses min-heap by default)._


**Time Complexity**
- $O(log(n))$ for inserting new elements into the heap
- $O(1)$ for median calculation
- $O(log(n))$ overall

**Space Complexity**
- $O(n)$ to maintain the heap
- $O(1)$ for median calculation
- $O(n)$ overall

In [7]:
class MedianTracker(Generic[M]):
    """
    Maintains a dynamic dataset and allows efficient median computation.
    """

    def __init__(self):
        # Max-heap for the lower half (invert values to use Python's min-heap)
        self.low = []  # max-heap: store negative numbers
        # Min-heap for the upper half
        self.high = []  # min-heap: store positive numbers

    def insert(self, val: float):
        """
        Insert a new value into the data structure.
        Time Complexity: O(log(n)) for heap insert
        Space Complexity: O(n) for the heap
        """
        # Step 1: Push onto max-heap
        if not self.low or val <= -self.low[0]:
            heapq.heappush(self.low, -val)
        else:
            heapq.heappush(self.high, val)

        # Step 2: Rebalance heaps to maintain sizes
        if len(self.low) > len(self.high) + 1:
            heapq.heappush(self.high, -heapq.heappop(self.low))
        elif len(self.high) > len(self.low):
            heapq.heappush(self.low, -heapq.heappop(self.high))

    def get_median(self) -> float:
        """
        Return the current median.
        Time Complexity: O(1) since array indexing is used
        Space Complexity: O(1) since no extra space is required
        """
        if len(self.low) == len(self.high):
            return (-self.low[0] + self.high[0]) / 2
        else:
            return -self.low[0]


    def __repr__(self) -> str:
        return f"Heap [ Low {self.low} | High {self.high} ]"
    

In [8]:
def test_median_heap():
    # Each tuple: (list of numbers to insert, expected median after all insertions)
    test_cases = [
        ([5, 3, 8], 5),                  # odd number of elements
        ([1, 2, 3, 4], 2.5),             # even number of elements
        ([5, 5, 5, 5], 5),               # duplicates
        ([10, 9, 8, 7, 6], 8),           # descending sequence
        ([1, 2, 3, 4, 5, 6], 3.5),       # increasing sequence
        ([1, 10, 2, 9, 3, 8], 5.5),      # interleaved insertions
    ]

    for idx, (nums, expected) in enumerate(test_cases, 1):
        mh = MedianTracker()
        for num in nums:
            mh.insert(num)
        result = mh.get_median()
        assert result == expected, f"Test {idx} failed: inserted {nums}, expected median {expected}, got {result}"

    print("All tests passed ✅")

test_median_heap()

All tests passed ✅


## 5. Longest Common Prefix in a Trie

Write a function that accepts an array of words, stores them in a trie and returns the longest common prefix. Write it so that it performs efficiently.

For example, given the array

```python
words = ["apple", "appetite", "apparatus", "appliance"]
```

The function should return

```sh
"app"
```

---
### Answers

The below function `longest_common_prefix` finds the longest common prefix in an array of words. It firsts constructs a trie, using the `Trie` class, which contains the words, taking $O(n)$ time, where $n$ is the number of characters in all words, and $O(w)$ space, where $w$ is the number of _unique_ characters across all words.

The root of the `Trie` points to a `TrieNode` object, which in turn points to other `TrieNode` objects. This `TrieNode` class contains the actual data and tracks the count of words to which each character belongs. This property is then used to perform the longest common prefix computation, noting:
- the root node will contain the highest `count`, since it is the root of all words (`root.count` = $n$)
- the longest common prefix will also contain the same `count` as the root (`prefix.count` = $n$)
- therefore, we only need to perform a DFS on the trie's nodes so long as the node contains the same count as the root
- the final node which is traversed and satisfies this propery will be the longest common prefix
- an input with no common prefixes will not traverse past the root (longest common prefix = "")

The **time complexity** of this operation is thus $O(p)$, where $p$ is the length of the longest common prefix, since the search terminates once the common prefix's count condition is broken. Furthermore, since the only extra space used for this computation is in the construction of the trie, the **space complexity** is $O(w)$, where $w$ is the number of _unique_ characters across all words.

In [9]:
def longest_common_prefix(words: List[str]) -> str:
    """
    Returns the longest prefix shared by all words in the trie.
    Time Complexity: O(k), where k is the length of the longest common prefix
    Space Complexity: O(n) for the trie space.
    """
    trie = Trie()
    trie.insert(words)
    longest_prefix = ""
    max_count = trie.root.count

    def visit(pfx: str, t: TrieNode) -> bool:
        nonlocal longest_prefix, max_count
        # The root node contains the max_count, since it holds all
        # descendant words
        # Therefore, the longest common prefix is the one which shares
        # the same count as the root node, and has a prefix longer than
        # it.
        if t.count == max_count and len(pfx) >= len(longest_prefix):
            longest_prefix = pfx
            return True # singals to continue the traversal
        return False # end traversal, no longer prefix to be found

    trie.traverse(visit)
    return longest_prefix

In [10]:
def test_longest_common_prefix():
    # Each test: (input_words, expected_prefix)
    tests = [
        (["apple", "appetite", "apparatus", "appliance"], "app"),
        (["dog", "door", "dove"], "do"),
        (["cat", "catalog", "catch"], "cat"),
        (["interact", "internet", "interval", "internal"], "inter"),
        (["flight", "flow", "flower"], "fl"),
        (["prefix", "suffix", "affix"], ""),
        (["same", "same", "same"], "same"),
        (["a", "ab", "abc"], "a"),
        ([""], ""),
        ([], ""),
    ]

    passed = failed = 0
    for words, expected in tests:
        result = longest_common_prefix(words)
        if result == expected:
            print(f"✅ PASSED: {words} → '{result}'")
            passed += 1
        else:
            print(f"❌ FAILED: {words} → got '{result}', expected '{expected}'")
            failed += 1

    print(f"\nSummary: {passed} passed, {failed} failed")

test_longest_common_prefix()

✅ PASSED: ['apple', 'appetite', 'apparatus', 'appliance'] → 'app'
✅ PASSED: ['dog', 'door', 'dove'] → 'do'
✅ PASSED: ['cat', 'catalog', 'catch'] → 'cat'
✅ PASSED: ['interact', 'internet', 'interval', 'internal'] → 'inter'
✅ PASSED: ['flight', 'flow', 'flower'] → 'fl'
✅ PASSED: ['prefix', 'suffix', 'affix'] → ''
✅ PASSED: ['same', 'same', 'same'] → 'same'
✅ PASSED: ['a', 'ab', 'abc'] → 'a'
✅ PASSED: [''] → ''
✅ PASSED: [] → ''

Summary: 10 passed, 0 failed
