## **B Trees**

In [7]:
from time import time
from random import randint

In [8]:
class Node:
    def __init__(self,leaf=False) -> None:
        self.keys = []
        self.children = []
        self.leaf = leaf

In [28]:
class BTree:
    def __init__(self, t) -> None:
        self.root = Node(True)
        self.t = t

    def insert(self,data):
        # get tree root
        root = self.root
        t = self.t
        # split root if max keys
        if len(root.keys) == 2*t - 1:
            # create new root node, set current root as child and split
            node = Node()
            self.root = node
            self.root.children.insert(0,root)
            self.splitChildren(self.root, 0)
            self.insertNonFull(self.root, data)
        else:
            self.insertNonFull(self.root, data)

    def insertNonFull(self,root,data):
        # set i to last element
        i = len(root.keys) - 1
        if root.leaf:
            # append None element to root keys
            root.keys.append(None)
            # iterate down, insert into correct position
            while i >= 0 and data < root.keys[i]:
                root.keys[i+1] = root.keys[i]
                i -= 1
            root.keys[i+1] = data
        else:
            # iterate down to find correct child list
            while i >= 0 and data < root.keys[i]:
                i -= 1
            i += 1
            # split child if full
            if len(root.children[i].keys) == ((2*self.t) - 1):
                self.splitChildren(root,i)
                if data > root.keys[i]:
                    i += 1
            self.insertNonFull(root.children[i], data)

    def splitChildren(self, root, i):
        t = self.t
        # get root children
        y = root.children[i]
        # create new root child
        z = Node(y.leaf)
        root.children.insert(i+1, z)
        # insert median child value
        root.keys.insert(i, y.keys[t-1])
        # update (split) children
        z.keys = y.keys[t:(2*t) - 1]
        y.keys = y.keys[0:t-1]

        # inherit children
        if not y.leaf:
            z.children = y.children[t:2*t]
            y.children = y.children[0:t]

    def delete(self,target,root=None):
        if root:
            # search for node
            t = self.t
            i = 0
            while i < len(root.keys) and root.keys[i] and target > root.keys[i]:
                i += 1
            # found and root is leaf
            if root.leaf:
                if i < len(root.keys) and target == root.keys[i]:
                    root.keys.pop(i)
                return
            
            # found and root is not leaf
            if i < len(root.keys) and target == root.keys[i]:
                return self.delete_internal_node(root,target,i)
            # search children that can immediate delete
            elif len(root.children[i].keys) >= t:
                self.delete(target, root.children[i])
            # search children that need restructure
            else:
                # child has left & right siblings
                if i != 0 and i + 2 < len(root.children):
                    if len(root.children[i-1].keys) >= t:
                        self.delete_sibling(root,i,i-1)
                    elif len(root.children[i+1].keys) >= t:
                        self.delete_sibling(root,i,i+1)
                    else:
                        self.delete_merge(root,i, i+1)
                # child has only right sibling
                elif i == 0:
                    if len(root.children[i+1].keys) >= t:
                        self.delete_sibling(root,i,i+1)
                    else:
                        self.delete_merge(root,i,i+1)
                # child has only left sibling
                elif i + 1 == len(root.children):
                    if len(root.children[i-1].keys) >= t:
                        self.delete_sibling(root,i,i-1)
                    else:
                        self.delete_merge(root,i,i-1)
                        i -= 1

                self.delete(target,root.children[i])
        else:
            self.delete(target,self.root)
    
    def delete_internal_node(self,root,target,i):
        t = self.t
        if root.leaf:
            if target == root.keys[i]:
                root.keys.pop(i)
                return
            return
        
        if len(root.children[i].keys) >= t:
            root.keys[i] = self.delete_predecessor(root.children[i])
            return

        elif len(root.children[i+1].keys) >= t:
            root.keys[i] = self.delete_successor(root.children[i+1])
            return

        else:
            self.delete_merge(root, i, i+1)
            # value is now middle key of child node
            self.delete_internal_node(root.children[i], target, t-1)

    def delete_predecessor(self,root):
        if root.leaf:
            return root.keys.pop()
        n = len(root.keys) - 1
        if len(root.children[n].keys) >= self.t:
            self.delete_sibling(root, n+1, n)
        else:
            self.delete_merge(root,n,n+1)
        self.delete_predecessor(root.children[n])


    def delete_successor(self,root):
        if root.leaf:
            return root.keys.pop(0)
        if len(root.children[1].keys) >= self.t:
            self.delete_sibling(root,0,1)
        else:
            self.delete_merge(root,0,1)
        self.delete_successor(root.children[0])


    # sibling becomes parent, parent replaces/adds to children
    def delete_sibling(self,root,i,j):
        center_node = root.children[i]
        if i < j:
            right_node = root.children[j]
            # add parent key to child node
            center_node.keys.append(root.keys[i])
            # sibling key becomes parent
            root.keys[i] = right_node.keys[0]
            # siblings left-most child becomes center node's right-most child
            if len(right_node.children) > 0:
                center_node.children.append(right_node.children[0])
                right_node.children.pop(0)
            
            right_node.keys.pop(0)

        else:
            left_node = root.children[j]
            center_node.keys.insert(0,root.keys[i-1])
            root.keys[i-1] = left_node.keys.pop()
            if len(left_node.children) > 0:
                center_node.children.insert(0, left_node.children.pop())

    # parent key merges with sibling node, adds to child node
    def delete_merge(self,root,i,j):
        new_node = None
        center_node = root.children[i]
        if i<j:
            right_node = root.children[j]
            center_node.keys.append(root.keys.pop(i))
            center_node.keys.extend(right_node.keys)
            if not right_node.leaf:
                center_node.children.extend(right_node.children)
            root.children.pop(j)
            new_node = center_node
        else:
            left_node = root.children[j]
            left_node.keys.append(root.keys.pop(j))
            left_node.keys.extend(center_node.keys)
            if not center_node.leaf:
                left_node.children.extend(center_node.children)
            root.children.pop(i)
            new_node = left_node
        
        if self.root == root and len(root.keys) == 0:
            self.root = new_node
        

    def print_tree(self,root, l=0):
        if not root:
            return root
        print(f"Depth: {l}")
        for i in root.keys:
            print(i, end=" ")
        print()
        l += 1
        if not root.leaf:
            for i in root.children:
                self.print_tree(i,l)

Insert with Different T Size

In [10]:
N = 100000

for test in range(9):

    t = 2 + test

    bTree = BTree(t)

    start = time()
    for _ in range(N):
        bTree.insert(randint(0,1000))
    end = time()

    print(f"t={t}:\t{end-start:.8f}")

#bTree.print_tree(bTree.root)

t=2:	0.64328074
t=3:	0.35655332
t=4:	0.32212639
t=5:	0.19744754
t=6:	0.28481913
t=7:	0.18640757
t=8:	0.28267097
t=9:	0.17297149
t=10:	0.18553948


Deletion Test

In [30]:
N = 100000
nums = [randint(0,1000) for x in range(N)]

for test in range(9):

    t = 2 + test

    bTree = BTree(t) 
    for i in nums:
        bTree.insert(i)

    start = time()
    for i in nums:
        bTree.delete(i,bTree.root)
    end = time()

    print(f"t={t}:\t{end-start:.8f}")

print(f"t={t}:\t{end-start:.8f}")

t=2:	0.17523980
t=3:	0.13423848
t=4:	0.10315704
t=5:	0.09025741
t=6:	0.08943605
t=7:	0.08395100
t=8:	0.07774186
t=9:	0.07924914
t=10:	0.08225703
t=10:	0.08225703
