In [1]:
from structures import *
from utils import *
import random

KeyboardInterrupt: 

In [None]:
MAX_KEYS = 3

In [None]:
class Node:
    def __init__(self):
        self.parent = None
        self.leaf = True
        self.keys = []
        self.pointers = [None]
        self.max_keys = MAX_KEYS
        self.min_leaf_keys = (self.max_keys + 1) // 2
        self.min_non_leaf_keys = self.max_keys // 2

In [None]:
_="""
We will assume, that the key is not present into the list
Start from root node, perform exact match for key as ‘key’ till a leaf node. Let the search path
be x1, x2, … , xh. The x1 is first node so root, then xh is leaf node. Each node xi is parent of xi+1
Insert the new object where key is ‘key’, and value is v into xh.
i := h
while xi overflows, do
   divide xi into two nodes, by moving the larger half of the keys into a new node p.
   if xi is leaf node, link p into the linked list among leaf nodes.
   identify a key k, to be inserted into the parent level along with child pointer pointing p.
   The choice of k depends on the type of the node xi. If xi is leaf node, we will perform
   copy up. So smallest key in p, is copied as k to the parent level. On the other hand, if xi is
   non-leaf node, then we will perform push up. So smallest key in p, will be copied into k,
   in the parent node.
   if i = 0, then
      create a new index node as the new root. In the new root store node with key k,
      and two child xi and p.
      return
   else
      insert a key k and a child pointer pointing to p, into node xi-1.
      i := i – 1
   end if
done
"""

In [None]:
def get_right_sibling(node):
    if node.parent == None:
        return None
    for i in range(len(node.parent.pointers)-1): # skip the last as if node is last pointer then no right sibling
        if node.parent.pointers[i] is node:
            return node.parent.pointers[i+1]
    return None

In [None]:
def get_left_sibling(node):
    if node.parent == None:
        return None
    for i in range(1, len(node.parent.pointers)): # skip the first as if node is first pointer then no left sibling
        if node.parent.pointers[i] is node:
            return node.parent.pointers[i-1]
    return None

In [None]:
def delete(node, key):
    delete_helper(node, key)
    if node.pointers[0] == None:
        print("tree is empty")
        return node
    if len(node.keys) == 0:
        print("root has no keys, updating root!")
        node.pointers[0].parent = None
        return node.pointers[0]
    return node

In [None]:
def delete_helper(node, key):
    if node.leaf:
        for i in range(len(node.keys)):
            if node.keys[i] == key:
                node.keys.pop(i)
                node.pointers.pop(i)
                next_largest = node.keys[i] if i < len(node.keys) else None
                break
        
        if next_largest == None:
            next_largest = node.pointers[-1].keys[0] if node.pointers[-1] else None
        
        if node.parent == None:
            return False, next_largest
        
        if len(node.keys) >= node.min_leaf_keys:
            return False, next_largest
        
#         print("Leaf underflow")
        # check if can borrow from left sibling
        left_sibling = get_left_sibling(node)
        if left_sibling and len(left_sibling.keys) > node.min_leaf_keys:
#             print("Leaf borrow from left")
            leaf_distribute(left_sibling, node)
            return False, next_largest
        
        # check if can borrow from right sibling
        right_sibling = get_right_sibling(node)
        if right_sibling and len(right_sibling.keys) > node.min_leaf_keys:
#             print("Leaf borrow from right")
            leaf_distribute(node, right_sibling)
            return False, next_largest
        
        # check if can merge with left sibling
        if left_sibling:
#             print("Leaf merge with left")
            leaf_merge(left_sibling, node)
            return True, next_largest
        
        # check if can merge with right sibling
        if right_sibling:
#             print("Leaf merge with right")
            leaf_merge(node, right_sibling)
            return True, next_largest
        
        raise Exception("Leaf deletion underflow could never borrow nor merge")
            
    elif not node.leaf:
        deleted = False
        for i in range(len(node.keys)):
            if key < node.keys[i]:
                pos = i
                res = delete_helper(node.pointers[i], key)
                deleted = True
                break
        
        if not deleted:
            pos = len(node.pointers) - 1
            res = delete_helper(node.pointers[-1], key)
        
        if res[0] == False or len(node.keys) >= node.min_non_leaf_keys or node.parent == None:
            replace_key(node, key, res[1])
            return False, res[1]
            
#         print("Non leaf underflow")
        # check if can borrow from left sibling
        left_sibling = get_left_sibling(node)
        if left_sibling and len(left_sibling.keys) > node.min_non_leaf_keys:
#             print("Non leaf borrow from left")
            distribute(left_sibling, node)
            replace_key(node, key, res[1])
            return False, res[1]
        
        right_sibling = get_right_sibling(node)
        if right_sibling and len(right_sibling.keys) > node.min_non_leaf_keys:
#             print("Non leaf borrow from right")
            distribute(node, right_sibling)
            replace_key(node, key, res[1])
            return False, res[1]
        
        # check if can merge with left sibling
        if left_sibling:
#             print("Non leaf merge with left")
            merge_with_left(left_sibling, node)
            replace_key(node, key, res[1])
            return True, res[1]
        
        # check if can merge with right sibling
        if right_sibling:
#             print("Non leaf merge with right")
            merge_with_right(node, right_sibling)
            replace_key(node, key, res[1])
            return True, res[1]
        

        raise Exception("Non leaf deletion underflow could never borrow nor merge")
            
        

In [None]:
def replace_key(node, old, new):
    if new == None:
        return
    for i in range(len(node.keys)):
        if node.keys[i] == old:
            node.keys[i] = new

In [None]:
def leaf_distribute(left, right):
#     print("leaf_distribute")
    all_keys = left.keys + right.keys
    all_pointers = left.pointers[:-1] + right.pointers[:-1]
    num_left = (len(all_keys) + 1) // 2

    left.keys = all_keys[:num_left]
    left.pointers = all_pointers[:num_left] + [left.pointers[-1]]

    right.keys = all_keys[num_left:]
    right.pointers = all_pointers[num_left:] + [right.pointers[-1]]
    
    update_parent_lb(left)
    update_parent_lb(right)

In [None]:
def update_parent_lb(node):
    for i in range(1, len(node.parent.pointers)):
        if node.parent.pointers[i] is node:
            node.parent.keys[i-1] = node.keys[0]

In [None]:
def leaf_merge(left, right):
#     print("leaf_merge")
    left.keys.extend(right.keys)
    left.pointers.pop()
    left.pointers.extend(right.pointers)
    remove_from_parent_next_pointer_and_key(left)
    update_parent_lb(left)

In [None]:
def remove_from_parent_next_pointer_and_key(node):
    for i in range(len(node.parent.pointers)-1):
        if node.parent.pointers[i] is node:
            node.parent.pointers.pop(i+1)
            return node.parent.keys.pop(i)
        
def remove_from_parent_prev_pointer_and_key(node):
    for i in range(1, len(node.parent.pointers)):
        if node.parent.pointers[i] is node:
            node.parent.pointers.pop(i-1)
            return node.parent.keys.pop(i-1)

In [None]:
def distribute(left, right):
#     print("distribute")
    parent = left.parent
    pivot_pos = None
    for i in range(len(parent.pointers)):
        if parent.pointers[i] is left:
            pivot_pos = i
    if pivot_pos == None or pivot_pos == len(parent.pointers) - 1:
        print("pivot_pos:", pivot_pos)
        raise Exception("If left has a right sibling, it must have a pivot_pos < len(parent.pointers) - 1")
    assert parent.pointers[pivot_pos+1] is right
    
    num_left = (len(right.keys) + len(left.keys) + 1) // 2
    num_right = (len(right.keys) + len(left.keys)) - num_left
    
    for _ in range(num_left - len(left.keys)):
        left.keys.append(parent.keys[pivot_pos])
        parent.keys[pivot_pos] = right.keys.pop(0)
        left.pointers.append(right.pointers.pop(0))
        left.pointers[-1].parent = left
#         left.keys[-1] = left.pointers[-1].keys[0]
    
    for _ in range(num_right - len(right.keys)):
        right.keys.insert(0, parent.keys[pivot_pos])
        parent.keys[pivot_pos] = left.keys.pop()
        right.pointers.insert(0, left.pointers.pop())
        right.pointers[0].parent = right


In [None]:
def merge_with_right(node, right):
    x = remove_from_parent_next_pointer_and_key(node)
    node.keys.append(x)
    for i in range(len(right.pointers)):
        node.pointers.append(right.pointers[i])
        node.pointers[-1].parent = node
#         node.keys[-1] = node.pointers[-1].keys[0]
        if i == len(right.keys):
            break
        node.keys.append(right.keys[i])

def merge_with_left(left, node):
    x = remove_from_parent_prev_pointer_and_key(node)
    node.keys.insert(0, x)
    # right.keys[0] = right.pointers[0].keys[0]
    while left.pointers:
        node.pointers.insert(0, left.pointers.pop())
        node.pointers[0].parent = node
        if left.keys:
            node.keys.insert(0, left.keys.pop())

In [None]:
_="""
We will assume, that the key is present into the list
Start from root node, perform exact match for key as ‘key’ till a leaf node. Let the search path
be x1, x2, … , xh. The x1 is first node so root, then xh is leaf node. Each node xi is parent of xi+1
delete the object where key is ‘key’ from xh.
if h = 1, then return, as there is only one node which is root.
i := h
while xi underflows, do
   if immediate sibling node s of xi, has at least m/2 + 1 elements, then
      redistribute entries evenly between s and xi.
      corresponding to redistribution, a key k in the parent node xi-1, will be changed.
      if xi is non-leaf node, then
         k is dragged down to xi. and a key from s is pushed up to fill the place of k
      else
         k is simply replaced by a key in s
      return
   else
      merge xi with the sibling node s. Delete the corresponding child pointer in xi-1.
      if xi is an internal node, then
         drag the key in xi-1. which is previously divides xi and s. into the new node
         xi and s, into the new node xi.
      else
         delete that key in xi-1.
      i := i – 1
   end if
done
"""

In [None]:
def insert_helper(node, key, value):
    if node.leaf:
        inserted = False
        for i in range(len(node.keys)):
            if key < node.keys[i]:
                node.keys.insert(i, key)
                node.pointers.insert(i, value)
                inserted = True
                break
        if not inserted:
            node.pointers.insert(len(node.keys), value)
            node.keys.insert(len(node.keys), key)
        if len(node.keys) > node.max_keys:
            num_left = (len(node.keys) + 1) // 2
            
            right_node = Node()
            right_node.keys = node.keys[num_left:]
            right_node.pointers = node.pointers[num_left:]
            
            node.keys = node.keys[:num_left]
            node.pointers = node.pointers[:num_left] + [right_node]
            
            to_insert = Node()
            to_insert.leaf = False
            to_insert.keys = [right_node.keys[0]]
            to_insert.pointers = [node, right_node]
            
            node.parent = to_insert
            right_node.parent = to_insert
            
            return to_insert
        return None
        
    elif not node.leaf:
        inserted = False
        for i in range(len(node.keys)):
            if key < node.keys[i]:
                pos = i
                res = insert_helper(node.pointers[i], key, value)
                inserted = True
                break
        if not inserted:
            pos = len(node.pointers) - 1
            res = insert_helper(node.pointers[-1], key, value)
        if res == None:
            return None
        node.keys.insert(pos, res.keys[0])
        node.pointers[pos] = res.pointers[0]
        node.pointers.insert(pos+1, res.pointers[1])
        node.pointers[pos].parent = node
        node.pointers[pos+1].parent = node
        
        if len(node.keys) > node.max_keys:
            num_left = len(node.keys) // 2
            
            right_node = Node()
            right_node.leaf = False
            right_node.keys = node.keys[num_left+1:]
            right_node.pointers = node.pointers[num_left+1:]
            for pointer in right_node.pointers:
                pointer.parent = right_node
            
            to_insert = Node()
            to_insert.leaf = False
            to_insert.keys = [node.keys[num_left]]
            to_insert.pointers = [node, right_node]
            
            node.keys = node.keys[:num_left]
            node.pointers = node.pointers[:num_left+1]
            
            node.parent = to_insert
            right_node.parent = to_insert
            
            return to_insert
        return None

In [None]:
def insert(root, key, value):
    res = insert_helper(root, key, value)
    return res if res else root

In [None]:
def show(root):
    cur = [root]
    while cur:
        nxt = []
        to_print = []
        for node in cur:
            if node == None:
                print("Empty tree")
                return
            for key in node.keys:
                to_print.append(key)
            to_print.append("|")
            for pointer in node.pointers:
                if type(pointer) != Node:
                    break
                nxt.append(pointer)
        print(" ".join(str(x) for x in to_print))
        print()
        cur = nxt

In [None]:
def validate_parent(root):
    for p in root.pointers:
        if type(p) == Node:
            assert p.parent is root
            validate_parent(p)
        else:
            return

In [None]:
def validate(root):
    if root.parent != None:
        if root.leaf:
            assert root.min_leaf_keys <= len(root.keys) <= root.max_keys
        else:
            assert root.min_non_leaf_keys <= len(root.keys) <= root.max_keys
    for i in range(len(root.keys)-1):
        assert root.keys[i] < root.keys[i+1]
    if type(root.pointers[0]) != Node:
        return root.keys[0]
    for i in range(len(root.pointers)-1):
        assert root.pointers[i].keys[0] < root.pointers[i+1].keys[0]
    for i in range(len(root.pointers)):
        if i > 0:
#             print("A:", root.keys[i-1])
#             print("B:", validate(root.pointers[i]))
            assert root.keys[i-1] == validate(root.pointers[i])
        else:
            validate(root.pointers[i])
    return validate(root.pointers[0])

In [None]:
def search_first_gte(root, key):
    """
    A utility function used by search_range to return the first leaf node >= key
    If found, return the leaf node containing the key and the index of the key in the node
    If not found, i.e. key is smaller than all keys, return None
    """
    if root.leaf:
        for i in range(len(root.keys)):
            if root.keys[i] >= key:
                return root, i
        if root.pointers[-1] == None:
            # this is true if self is the rightmost leaf node
            return None
        # if leaf node is not rightmost, we know the first key of the immediate right neightbour will satisfy condition
        # because self.pointers[-1].keys[0] >= some LB > key
        return root.pointers[-1], 0
    else:
        # find the subtree to recursively call on

        for i in range(len(root.keys)):
            if key < root.keys[i]:
                return search_first_gte(root.pointers[i], key)
        return search_first_gte(root.pointers[-1], key)
def search_range(root, lower, upper, return_key=False):
    """
    Returns a list of all values whose keys are in the range [lower, upper] inclusive
    If lower is None, it is treated as no lower bound
    If upper is None, it is trated as no upper bound
    If both are None, return all values
    """
    if lower == None:
        lower = ""
    if upper == None:
        upper = chr(255) * 12
    if lower > upper:
        return []
    
    first_gte = search_first_gte(root, lower)
    res = []
    if first_gte == None:
        return res
    node, pos = first_gte
    while node:
        for i in range(pos, len(node.keys)):
            if node.keys[i] > upper:
                # current and all other leaf nodes on the road are greater than upper bound and not part of res
                # so we can just return res
                return res
            if return_key:
                res.append(node.keys[i])
            else:
                res.append(node.pointers[i])
        # move to the immediate right neighbour
        node = node.pointers[-1]
        pos = 0
    # this return is needed if the res includes the rightmost leaf node
    return res

In [None]:
def get_num_nodes_dfs(root):
    if type(root.pointers[0]) != Node:
        return 1
    return 1 + sum(get_num_nodes(root.pointers[i]) for i in range(len(root.pointers)))

In [None]:
def get_num_nodes_bfs(root):
    res = 0
    cur = [root]
    while cur:
        nxt = []
        for node in cur:
            res += 1
            for pointer in node.pointers:
                if type(pointer) != Node:
                    break
                nxt.append(pointer)
        cur = nxt
    return res

In [None]:
def get_height(root):
    res = 0
    while type(root) == Node:
        res += 1
        root = root.pointers[0]
    return res

In [None]:
def delete_range(node, lower, upper): # inclusive
    to_delete = search_range(node, lower, upper, True)
    for k in to_delete:
        node = delete(node, k)
    return node

In [None]:
get_num_nodes_bfs(root)

In [None]:
get_num_nodes_dfs(root)

In [None]:
get_height(root)

In [None]:
root = Node()
records = parse_data()
for record in records:
    root = insert(root, str(record[1]) + record[0], record)

In [None]:
# experiment 3
exp3 = search_range(root, "8.0", "8.1")
actual_exp3 = []
for record in records:
    if record[1] == 8.0:
        actual_exp3.append(record)
assert sorted(exp3) == sorted(actual_exp3)

In [None]:
# experiment 4
exp4 = [record[0] for record in search_range(root, "7.0", "9.1")]
actual_exp4 = [record[0] for record in records if 7 <= record[1] <= 9]
assert sorted(exp4) == sorted(actual_exp4)

In [None]:
# experiment 5
root = delete_range(root, "7.0", "7.1")
records_remaining = search_range(root, None, None)
actual_records_remaining = [record for record in records if record[1] != 7]
assert sorted(records_remaining) == sorted(actual_records_remaining)