# 使用二分查找实现的Mapping

In [1]:
%run search_base.ipynb
%run utils.ipynb

In [2]:
class BinarySearchMapping(MapBase):
    """基于二分查找实现的映射

    .. note:: 参考<<算法>> P238 3.1.5小节

    .. todo::

        优化 :meth:`put` :meth:`_find_position`, 如给定的key不存在时,
        二分查找应该返回 ``lo``, ``lo`` 就是列表中小于给定key的键的数量.
    """

    def __init__(self):
        self._keys = []
        self._vals = []
        self._size = 0

    def __str__(self):
        m = '\t'.join(map(str, range(self._size))) or '[]'
        x = '\t'.join(map(str, self._keys)) or '[]'
        y = '\t'.join(map(str, self._vals)) or '[]'
        return "idxs: %s\nkeys: %s\nvals: %s" % (m, x, y)

    def size(self):
        return self._size

    def rank(self, k):
        return binary_search_v1(self._keys, k)

    def get(self, k):
        if self.is_empty():
            return None
        idx = self.rank(k)
        if idx == -1:
            return None
        return self._vals[idx]

    def put(self, k, v):
        if self.is_empty():
            self._keys.append(k)
            self._vals.append(v)
            self._size += 1
            return

        # 二分查找, 如果key存在则更新, 否则插入
        lo = 0
        hi = self._size - 1
        while lo <= hi:
            mid = lo + (hi - lo) // 2
            if k < self._keys[mid]:
                hi = mid - 1
            elif k > self._keys[mid]:
                lo = mid + 1
            else:
                self._keys[mid] = k
                self._vals[mid] = v
                return

        # key不存在, 插入新值
        self._size += 1
        if k > self._keys[mid]:
            self._keys.insert(mid+1, k)
            self._vals.insert(mid+1, v)
        else:
            self._keys.insert(mid, k)
            self._vals.insert(mid, v)

    def delete(self, k):
        idx = self.rank(k)
        if idx == -1:
            return None
        self._size -= 1
        self._keys.pop(idx)
        return self._vals.pop(idx)

    def min(self):
        return None if self.is_empty() else self._keys[0]

    def max(self):
        return None if self.is_empty() else self._keys[-1]

    def _find_position(self, k):
        """
        找出key的位置, 如果k存在, 返回其索引,
        否则返回二分查找后的mid
        """
        # 外层须确保映射不为空
        lo = 0
        hi = self._size - 1
        while lo <= hi:
            mid = lo + (hi - lo) // 2
            if k < self._keys[mid]:
                hi = mid - 1
            elif k > self._keys[mid]:
                lo = mid + 1
            else:
                return True, mid
        return False, mid

    def keys_between(self, k1, k2, size=False):
        if self.is_empty():
            return 0 if size else []

        assert k1 <= k2

        k1_exist, idx1 = self._find_position(k1)
        if not k1_exist:
            if k1 < self._keys[idx1]: idx1 += 0
            else:                     idx1 += 1

        k2_exist, idx2 = self._find_position(k2)
        if not k2_exist:
            if k2 < self._keys[idx2]: idx2 -= 1
            else:                     idx2 -= 0

        if size: return idx2 + 1 - idx1
        return self._keys[idx1: idx2+1]

    def floor(self, k):
        if self.is_empty(): return None
        exist, pos = self._find_position(k)
        if not exist:
            if k > self._keys[pos]:   pos += 1
            else:                     pos += 0

        return self._keys[pos-1] if pos > 0 else None

    def ceiling(self, k):
        if self.is_empty(): return None
        exist, pos = self._find_position(k)
        if not exist:
            if k < self._keys[pos]:   pos -= 1
            else:                     pos -= 0

        if pos+1 == self.size():
            return None
        return self._keys[pos+1]

    def select(self, n):
        # 当key不存在时, self.rank(k) == -1
        # 暂不考虑python中的负索引的情况
        if n == -1: return None
        return self._keys[n]

    def size_between(self, k1, k2):
        return self.keys_between(k1, k2, True)

In [3]:
tester = MappingTest(BinarySearchMapping)

In [4]:
# 测试mapping为空的情况
empty_map = BinarySearchMapping()

In [5]:
tester.test_map_related(empty_map)

idxs: []
keys: []
vals: []

size:  0
keys:  []


In [6]:
tester.test_single_key(empty_map)

idxs: []
keys: []
vals: []

key	exist	get	rank	select	floor	ceil
g	False	None	-1	None	None	None	
a	False	None	-1	None	None	None	
t	False	None	-1	None	None	None	
c	False	None	-1	None	None	None	
0	False	None	-1	None	None	None	
z	False	None	-1	None	None	None	


In [7]:
tester.test_key_range(empty_map)

idxs: []
keys: []
vals: []

('g', 'g') 0 []
('g', 'p') 0 []
('a', 't') 0 []
('a', 'z') 0 []
('0', 't') 0 []
('0', 'z') 0 []


In [8]:
# 测试mapping不为空的情况
tester.test_map_related()

idxs: 0	1	2	3	4	5	6	7	8
keys: a	e	g	i	m	n	p	s	t
vals: 5	1	10	8	4	9	7	2	3

size:  9
keys:  ['a', 'e', 'g', 'i', 'm', 'n', 'p', 's', 't']


In [9]:
tester.test_single_key()

idxs: 0	1	2	3	4	5	6	7	8
keys: a	e	g	i	m	n	p	s	t
vals: 5	1	10	8	4	9	7	2	3

key	exist	get	rank	select	floor	ceil
g	True	10	2	g	e	i	
a	True	5	0	a	None	e	
t	True	3	8	t	s	None	
c	False	None	-1	None	a	e	
0	False	None	-1	None	None	a	
z	False	None	-1	None	t	None	


In [10]:
tester.test_key_range()

idxs: 0	1	2	3	4	5	6	7	8
keys: a	e	g	i	m	n	p	s	t
vals: 5	1	10	8	4	9	7	2	3

('g', 'g') 1 ['g']
('g', 'p') 5 ['g', 'i', 'm', 'n', 'p']
('a', 't') 9 ['a', 'e', 'g', 'i', 'm', 'n', 'p', 's', 't']
('a', 'z') 9 ['a', 'e', 'g', 'i', 'm', 'n', 'p', 's', 't']
('0', 't') 9 ['a', 'e', 'g', 'i', 'm', 'n', 'p', 's', 't']
('0', 'z') 9 ['a', 'e', 'g', 'i', 'm', 'n', 'p', 's', 't']


In [11]:
tester.test_delete()

idxs: []
keys: []
vals: []

del a:  None

idxs: 0
keys: a
vals: 1

del a:  1
del a:  None

idxs: []
keys: []
vals: []
