# 树

> 由一个个节点连接而成，每个节点只能从一条路径访问到

## 核心概念

> 父节点：节点在路径中前一个节点
> 子节点：节点在路径中的后续节点
> 根节点：入口节点
> 叶子节点：没有子节点
> 兄弟节点：拥有同一个父节点
> 节点高度：节点到叶子节点的最长路径
> 节点深度：根节点到节点的路径
> 层数：深度+1

## 二叉树

> 最多拥有2个子节点
>
> 满二叉树：除了最后一层的叶子节点，其他节点都拥有2个子节点
> 完全二叉树：最后一层的叶子节点从左往右看是连续的，并且其他节点都拥有2个子节点

### 二叉树的数据结构

> 链表存储：最常见
> 数组存储：适用于完全二叉树。根节点存储在数组下标1的位置，如果父节点的下标为i，左子节点的下标为2i，右子节点的下标为：2i+1

### 二叉树的遍历

In [49]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [50]:
# Tree Node
class Node:
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right
    
    def __repr__(self):
        return f"Node({self.value}, {self.left}, {self.right})"

node = Node(1, Node(2, Node(4), Node(5)), Node(3, Node(6), Node(7)))

#### 前序遍历

> 根节点 -> 左子节点 -> 右子节点

In [71]:
def pre_order(node):
    if node is None:
        return
    print(node.value)
    pre_order(node.left)
    pre_order(node.right)

pre_order(node)

1
2
4
5
3
6
7


In [72]:
def pre_order_stack(node):
    stack = [node]
    while stack:
        node = stack.pop()
        if not node:
            continue
        print(node.value)
        stack.append(node.right)
        stack.append(node.left)

pre_order_stack(node)

1
2
4
5
3
6
7


#### 中序遍历

> 左子节点 -> 根节点 -> 右子节点

In [73]:
def in_order(node):
    if not node:
        return
    in_order(node.left)
    print(node.value)
    in_order(node.right)

in_order(node)

4
2
5
1
6
3
7


In [74]:
def in_order_stack(node):
    stack = []
    while stack or node:
        while node:
            stack.append(node)
            node = node.left

        node = stack.pop()
        print(node.value)
        node = node.right

in_order_stack(node)

4
2
5
1
6
3
7


#### 后续遍历

> 左子节点 -> 右子节点 -> 根节点

In [75]:
def post_order(node):
    if not node:
        return
    post_order(node.left)
    post_order(node.right)
    print(node.value)

post_order(node)

4
5
2
6
7
3
1


In [76]:
def post_order_stack(node):
    stack, out = [node], []
    while stack:
        node = stack.pop()
        if not node:
            continue
        out.append(node.value)
        stack.append(node.left)
        stack.append(node.right)
    print(out[::-1])

post_order_stack(node)

[4, 5, 2, 6, 7, 3, 1]


#### 按层级从左到右遍历

In [51]:
def layer_order(node):
    if node is None:
        return
    cur = [node]
    next = []
    while cur:
        for node in cur:
            print(node.value)
            if node.left:
                next.append(node.left)
            if node.right:
                next.append(node.right)
        cur = next
        next = []

layer_order(node)

1
2
3
4
5
6
7


In [52]:
from collections import deque


def layer_order_v2(node):
    queue = deque(node)
    while queue:
        node = queue.popleft()
        if not node:
            continue
        print(node.value)
        queue.append(node.left)
        queue.append(node.right)

layer_order(node)

1
2
3
4
5
6
7


#### 求树的高度

In [78]:
# 按层遍历
def tree_height(root):
    if not root:
        return 0
    height = 0
    cur = [root]
    next = []
    while cur:
        height += 1
        for node in cur:
            if node.left:
                next.append(node.left)
            if node.right:
                next.append(node.right)
        cur = next
        next = []
    return height

tree_height(node)

# recursion
def tree_height_v2(root):
    if not root:
        return 0
    return 1 + max(tree_height_v2(root.left), tree_height_v2(root.right))

tree_height_v2(node)

3

3

## 二叉查找树

> 节点的值大于左子树所有节点的值，小于等于右子树所有节点的值，按中序遍历可得升序排序的数组

### 常见的操作

In [79]:
bst = Node(4, Node(2, Node(1), Node(3)), Node(6, Node(5), Node(7)))

In [80]:
# search in binary search tree (BST) with different value
def search_bst(root, val):
    while root and root.value != val:
        if root.value > val:
            root = root.left
        else:
            root = root.right
    
    return root

search_bst(bst, 6)

Node(6, Node(5, None, None), Node(7, None, None))

In [81]:
# add node to bst
def insert_into_bst(root, val):
    if not root:
        return Node(val)
    cur = root
    while True:
        if cur.value > val:
            if cur.left:
                cur = cur.left
            else:
                cur.left = Node(val)
                return root
        else:
            if cur.right:
                cur = cur.right
            else:
                cur.right = Node(val)
                return root

insert_into_bst(bst, 8)

Node(4, Node(2, Node(1, None, None), Node(3, None, None)), Node(6, Node(5, None, None), Node(7, None, Node(8, None, None))))

In [82]:
# delete node from bst
def delete_node(root, key):
    if not root:
        return root
    
    pp = None
    p = root
    while p and p.value != key:
        pp = p
        if p.value > key:
            p = p.left
        else:
            p = p.right
    
    if not p:
        return root
    
    if p.left and p.right:
        min_pp = p
        min_p = p.right
        while min_p.left:
            min_pp, min_p = min_p, min_p.left
        p.value = min_p.value
        pp, p = min_pp, min_p

    child = p.left if p.left else p.right

    if not pp:
        return child
    
    if pp.left == p:
        pp.left = child
    else:
        pp.right = child
    
    return root


delete_node(bst, 8)

Node(4, Node(2, Node(1, None, None), Node(3, None, None)), Node(6, Node(5, None, None), Node(7, None, None)))

In [83]:
# find minimum value
def minimum_in_bst(root):
    while root:
        if root.left:
            root = root.left
        else:
            return root.value

minimum_in_bst(bst)

1

In [84]:
# find maximum value
def maximum_in_bst(root):
    while root:
        if root.right:
            root = root.right
        else:
            return root.value

maximum_in_bst(bst)

7

In [85]:
class Path:
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

In [86]:
# find the biggest one which smaller than value
def precursor_node(root, val):
    pp = None
    pl = None
    # path = Path(root)
    while root and root.value != val:
        pp = root
        if root.value > val:
            root = root.left
        else:
            root = root.right
        
        if pp.right == root:
            pl = pp
        # path = Path(root, path)
    
    if not root:
        return None

    if not root.left:
        return pl.value if pl else None

        while path.parent and path.parent.value.right != path.value:
            path = path.parent
        
        if path.parent:
            return path.parent.value.value
        else:
            return None
    
    root = root.left
    while root.right:
        root = root.right
    
    return root.value

precursor_node(bst, 3)
precursor_node(bst, 4)
precursor_node(bst, 5)

2

3

4

In [87]:
# find the smallest one which bigger than value
def successor_node(root, val):
    pp = None
    pr = None
    # path = Path(root)
    while root and root.value != val:
        pp = root
        if root.value > val:
            root = root.left
        else:
            root = root.right
            
        if pp.left == root:
            pr = pp
        # path = Path(root, path)
    
    if not root.right:
        return pr.value if pr else None

        while path.parent and path.parent.value.left != path.value:
            path = path.parent
        
        if path.parent:
            return path.parent.value.value
        else:
            return None
    
    root = root.right
    while root.left:
        root = root.left
    
    return root.value

successor_node(bst, 3)
successor_node(bst, 4)
successor_node(bst, 5)

4

5

6

### 时间复杂度分析

> 当BST退化成链表时，最差时间复杂度为O(n)，也等于O(height)
>
> 当BST是完全二叉树时，遍历跟height有关，height在[log(n+1)-1, logn]之间，所以最差时间复杂度为O(logn)

> 可见性能和树的高度有关，平衡二叉搜索树高度接近logn

### 平衡二叉搜索树对比hash表的优势

> 1. 数据有序
> 2. hash表扩容、hash冲突导致性能不稳定
> 3. hash表设计相对比较复杂，要考虑hash函数设计，hash冲突、扩缩容解决方法。平衡二叉搜索树之需要考虑平衡问题


## 平衡二叉搜索树

> 严格定义：任一节点的左右子树高度差不超过1
>
> 常见：AVL树

### 红黑树（Red-Black Tree）

> 工程应用中一种流行的“不严格”的平衡二叉搜索树，子节点的高度差可能达到一倍
>
> 定义：节点被标记为黑色或者红色
> 1. 根节点是黑色
> 2. 每个叶子节点是黑色的空节点，也就是叶子节点不存储数据
> 3. 任何相邻的节点不能同时为红色
> 4. 根节点到子节点的所有路径上的黑色节点数量一致
>
> 优点：常见操作保持对数级复杂度，且为了保持相对平衡成本比较低

### 插入

> A. 按照bst算法找到插入点插入，标红
>
> B. 进入下面调整逻辑
> 1. 如果没有父节点或者父节点黑，结束
> 2. 如果叔节点红，父、叔标黑，祖标红，关注节点变成祖，进入下一轮
> 3. 如果不同边（子在父的左边，父在祖的右边，或着反过来），关注点变成父，旋转，父标黑，祖标红，围绕祖反向旋转，进入下一轮

### 删除

[参考](image/TreeMap红黑树源码详解.pdf)

#### 复杂度分析

> 如果把红色节点去掉，剩下黑色节点构成的树是一颗完全二叉树（根节点到子节点经过的路径上黑节点数量一样），bh（black tree height）和 n 的关系 n >= 2^bh - 1，bh <= log(n+1)
>
> 因为不会有两个相邻的红色节点，所以根节点到子节点的一条路径上，红色节点数量最多和黑色节点数量一样，所以树高最大值：h = 2bh = 2log(n+1)

## 堆

> 定义
> 1. 完全二叉树
> 2. 每个节点值大于等于子节点的值（大顶堆），每个节点值小于等于子节点的值（小顶堆）

### 常见操作 & 实现
> 使用数组存储
>
> 插入：从下往上构建堆，依次和父节点比较、交换，时间复杂度O(logn)
>
> 删除堆顶：把尾节点覆盖根节点，从上往下构建堆，依次和子节点比较、交换，时间复杂度O(logn)

### 常见应用
> 1. 排序
> 2. topK
> 3. 求中位数
> 4. 优先队列

In [None]:
# Max Heap
class Heap:
    def __init__(self, array):
        self.array = array
        self.count = len(array)
        self._build_heap()

    def push(self, val):
        self.array.append(val)
        i = self.count
        while ((i - 1) >> 1) >= 0 and self.array[i] > self.array[(i-1)>>1]:
            self.array[i], self.array[(i-1)>>1] = self.array[(i-1)>>1], self.array[i]
            i = (i - 1) >> 1
        self.count += 1
    
    def pop(self):
        if self.count == 0:
            return
        self.array[0], self.array[-1] = self.array[-1], self.array[0]
        top = self.array.pop()
        self.count -= 1
        Heap._heapify(self.array, self.count, 0)
        return top
    
    @staticmethod
    def _heapify(array, n, i):
        while True:
            max_pos = i
            if 2*i+1 < n and array[2*i+1] > array[max_pos]:
                max_pos = 2*i+1
            
            if 2*i+2 < n and array[2*i+2] > array[max_pos]:
                max_pos = 2*i+2
            
            if max_pos == i:
                break

            array[i], array[max_pos] = array[max_pos], array[i]
            i = max_pos
    
    def _build_heap(self):
        '''time complexity O(n)'''
        for i in range((self.count-1) >> 1, -1, -1):
            Heap._heapify(self.array, self.count, i)
    
    def sorted(self):
        array = self.array[:self.count]
        for i in range(len(array)-1, -1, -1):
            array[0], array[i] = array[i], array[0]
            Heap._heapify(array, i ,0)
        return array

    # def topK(self, k):
    #     array = self.array[:self.count]
    #     k = min(k, len(array))
    #     for i in range(k):
    #         max_pos = len(array)-1-i
    #         array[0], array[max_pos] = array[max_pos], array[0]
    #         Heap._heapify(array, max_pos ,0)
    #     return array[-k:][::-1]



array = [7,5,19,8,4,1,20,13,16]

heap = Heap(array)
array
heap.push(22)
array
heap.pop()
array
heap.sorted()
# heap.topK(5)

[20, 16, 19, 13, 4, 1, 7, 5, 8]

[22, 20, 19, 13, 16, 1, 7, 5, 8, 4]

22

[20, 16, 19, 13, 4, 1, 7, 5, 8]

[1, 4, 5, 7, 8, 13, 16, 19, 20]

[20, 19, 16, 13, 8]

In [47]:
# topk
import heapq
import math

def topk(array, k):
    priortiy = [float('-inf')] * k
    for val in array:
        if val > priortiy[0]:
            heapq.heappushpop(priortiy, val)
    return priortiy

topk(array, 5)

# quantile
class QuantileFinder:
    def __init__(self, quantile=0.5):
        self.quantile = quantile
        self.max_queue = []
        self.min_queue = []
    
    def insert(self, val):
        heapq.heappush(self.max_queue, -val)

        if self.min_queue and -self.max_queue[0] > self.min_queue[0]:
            heapq.heappush(self.min_queue, -heapq.heappop(self.max_queue))
        
        if len(self.max_queue) > math.ceil(self.quantile * (len(self.max_queue) + len(self.min_queue))):
            heapq.heappush(self.min_queue, -heapq.heappop(self.max_queue))
        elif len(self.min_queue) > int((1 - self.quantile) * len(self.max_queue) + len(self.min_queue)):
            heapq.heappush(self.max_queue, -heapq.heappop(self.min_queue))
    
    def take(self):
        return -self.max_queue[0]
        

median_finder = QuantileFinder(0.3)
median_finder.insert(1)
median_finder.take()
median_finder.insert(2)
median_finder.take()
median_finder.insert(3)
median_finder.take()

[8, 13, 20, 19, 16]

1

1

1