# 跳表

## 原理
> 已知位置的情况下，在链表上插入、删除数据是特别快的。但是在链表上查找时间复杂度是O(n)，所以链表这种数据结构本身并不适合存储有序的数据。有没有什么办法可以优化一下呢？

> 一种思路是，我们可以使用索引的思想来加快查找速度，那么如何来设计索引呢？

> 可以从每两个节点中选取一个节点，形成索引链表，索引链表的节点多出一个“down”指针，指向下一级（只有一层索引的情况下，指向原数据链表，否则指向下一级索引链表）。这样查找速度就快了一倍，类似的，我们可以在此基础上，继续增加更多的索引链表。

> 索引链表从“上”到“下”，分别为k、k-1、... 1 级索引。查找时，从最大的索引链表开始，依次同下一个节点值进行比较，如果下一个节点不存在或者值大于查找值，则向下一级重复这个查找过程，直到找到数据或者下一级不存在时停止

## 性能

> 如果每k个节点提取一个索引，则一共有logkn 层
> 每层最多遍历k个节点，所以时间复杂度是O(klogkn)，k是一个常数且远小于n，所以是O(logn)
> 第一级索引节点数量是 n/k，第二级是 n/k2，以此类推，索引总数量是 n/k, n/k2, .... , k, 1，是一个等比数列，等于 (n-1)/(k-1)，所以空间复杂度是O(n)

In [76]:
import random


class SkipList:

    class Node:
        def __init__(self, level, data=None):
            self.data = data
            self.forwards = [None] * level
            self.span = [0] * level
    
    def __init__(self, p=0.5, max_level=16):
        self.head = SkipList.Node(max_level)
        self.level = 1
        self.max_level = max_level
        self.p = p
        self.size = 0
    
    def _random_level(self):
        level = 1
        while random.random() < self.p and level < self.max_level:
            level += 1
        
        return level

    def search(self, data):
        cur = self.head
        for i in range(self.level-1, -1, -1):
            if cur.forwards[i] and cur.forwards[i].data < data:
                cur = cur.forwards[i]
        
        return cur.forwards[0] is not None and cur.forwards[0].data == data
    
    def add(self, data):
        pre = [None] * self.max_level
        rank = [0] * self.max_level
        cur = self.head
        for i in range(self.level-1, -1, -1):
            if i < self.level-1:
                rank[i] = rank[i+1]
            while cur.forwards[i] and cur.forwards[i].data < data:
                rank[i] += cur.span[i]
                cur = cur.forwards[i]
            pre[i] = cur
        
        level = self._random_level()
        node = SkipList.Node(level, data)

        if level > self.level:
            for i in range(self.level, level):
                pre[i] = self.head
                pre[i].span[i] = self.size

        for i in range(level):
            node.forwards[i] = pre[i].forwards[i]
            pre[i].forwards[i] = node

            node.span[i] = pre[i].span[i] - (rank[0] - rank[i])
            pre[i].span[i] = rank[0] - rank[i] + 1
        
        for i in range(level, self.level):
            pre[i].span[i] += 1

        self.size += 1
        self.level = max(self.level, level)
    

    def delete(self, data):
        pre = [None] * self.level
        cur = self.head
        for i in range(self.level-1, -1, -1):
            while cur.forwards[i] and cur.forwards[i].data < data:
                cur = cur.forwards[i]
            pre[i] = cur
        
        if not pre[0].forwards[0] or pre[0].forwards[0].data != data:
            return False
        
        for i in range(self.level):
            if pre[i].forwards[i] and pre[i].forwards[i].data == data:
                node = pre[i].forwards[i]
                pre[i].forwards[i] = node.forwards[i]
                pre[i].span[i] += node.span[i] - 1
                node.forwards[i] = None
            else:
                pre[i].span[i] -= 1
        
        while self.level > 1 and not self.head.forwards[self.level - 1]:
            self.level -= 1

        self.size -= 1
        return True
    
    def get_rank(self, data):
        cur = self.head
        rank = 0
        for i in range(self.level-1, -1, -1):
            while cur.forwards[i] and cur.forwards[i].data < data:
                rank += cur.span[i]
                cur = cur.forwards[i]
        
        if cur.forwards[0] and cur.forwards[0].data == data:
            return rank + cur.span[0]
        
        return -1
    
    def get_by_rank(self, rank):
        assert 0 < rank <= self.size, f'rank must between 1-{self.size}'

        cur = self.head
        cur_rank = 0
        for i in range(self.level-1, -1, -1):
            while cur.forwards[i] and cur_rank + cur.span[i] <= rank:
                cur_rank += cur.span[i]
                cur = cur.forwards[i]
            
            if cur_rank == rank:
                return cur.data


# Create a SkipList instance
skiplist = SkipList()

# Add elements to the SkipList
skiplist.add(10)
skiplist.add(20)
skiplist.add(30)
skiplist.add(40)

# Test search functionality
print("Search 20:", skiplist.search(20))  # Expected: True
print("Search 25:", skiplist.search(25))  # Expected: False

# Test get_rank functionality
print("Rank of 10:", skiplist.get_rank(10))  # Expected: 1
print("Rank of 30:", skiplist.get_rank(30))  # Expected: 3
print("Rank of 50:", skiplist.get_rank(50))  # Expected: -1

# Test get_by_rank functionality
print("Element at rank 1:", skiplist.get_by_rank(1))  # Expected: 10
print("Element at rank 3:", skiplist.get_by_rank(3))  # Expected: 30

# Test delete functionality
print("Delete 20:", skiplist.delete(20))  # Expected: True
print("Search 20 after deletion:", skiplist.search(20))  # Expected: False
print("Rank of 30 after deletion:", skiplist.get_rank(30))  # Expected: 2


Search 20: True
Search 25: False
Rank of 10: 1
Rank of 30: 3
Rank of 50: -1
Element at rank 1: 10
Element at rank 3: 30
Delete 20: True
Search 20 after deletion: False
Rank of 30 after deletion: 2
