# BM算法

> 用于在文本S中搜索模式串P



## 整体思路
> 1. 从模式串P尾部开始匹配，i, j 分别为 S、P 当前比较的位置下标，n, m分别为 S、P的长度，初始化 i=j=m-1
> 2. 当 S[i] == P[j], i--, j--
> 3. 当 S[i] != P[j], S[i] 称为“坏字符”，S[i+1:i+m-j]（后面已经匹配上的子串）称为“好后缀”。例如S为ABCAD，P为BAD，当 i = 2, j = 0时, “C”为坏字符，“AD”为好后缀
> 4. 模式串有两种向后移动的方法(规则)
> 5. 坏字符规则：在P当前失配位置左边找最近的匹配字符。例如P为ABCAB，失配时，S[i] = A, j=4, 最近的匹配字符为j=3的”A“
> 6. 好后缀规则：在P当前失配位置左边找最近的匹配子串。例如P为ABCAB，失配时，S[i] = C, j=2，最近的匹配子串为j=0的”AB“
> 7. 当 i >= s_len, 没匹配；当j < 0, 匹配

### 坏字符规则

> 后移模式串，等同于后移i，也就是我们要找出i后移的距离d
> 二维坏字符表：行表示文本串中的字符，列表示失配时在P的位置，值表示i需要移动的距离d，d > m - j，坏字符在左边的位置为k，d = m - k

In [1]:
# 2D bad character table
import numpy as np
import pandas as pd

def bad_character_table(p):
    m = len(p)
    char = list(set(p))
    table = pd.DataFrame(np.full((len(char), m), m), index=char)
    for i in range(1, m):
        table.iloc[:, i] = table.iloc[:, i-1]
        table.loc[p[i-1], i] = m - i
    return table

print(bad_character_table('BCDBACD'))


   0  1  2  3  4  5  6
A  7  7  7  7  7  2  2
D  7  7  7  4  4  4  4
B  7  6  6  6  3  3  3
C  7  7  5  5  5  5  1


### 好后缀规则

> 分两种情况
> 1. 失配位置的左边能找到好后缀，假设失配位置为j，相当于找P[:j]最长的子串（最长前缀串），子串后缀为P[j+1:]，i移动的距离d，d > m-j, 重复串头部位置为t，d = m - t
> 2. 失配位置的左边不能找到好后缀或能在前缀中找到部分好后缀，d = m + len(suffix) - len(prefix)

> 因为我们要在失配位置左边找离失配位置最近的好后缀，所以先找第二种情况，再找第一种情况

In [2]:
# good suffix table
def good_suffix_table(p):
    m = len(p)
    table = [0] * m
    # suffix not found
    def good_suffix_not_on_left(p):
        table[m-1] = 1
        last_prefix_length = 0
        for i in range(m-1, 0, -1):
            suffix_length = m - i
            if p[i:] == p[:m-i]:
                last_prefix_length = prefix_length = suffix_length
            else:
                prefix_length = last_prefix_length
            table[i-1] = m + suffix_length - prefix_length
    # suffix found
    def good_suffix_on_left(p):
        def common_suffix_length(p, i, j):
            length = 0
            while i >= 0 and p[i] == p[j]:
                length += 1
                i -= 1
                j -= 1
            return length
        
        for i in range(m-1):
            common_suffix_length_ = common_suffix_length(p, i, m-1)
            if common_suffix_length_ > 0:
                j = m - 1 - common_suffix_length_
                table[j] = m - (i + 1 - common_suffix_length_)
    
    good_suffix_not_on_left(p)
    good_suffix_on_left(p)
    return table

> 结合“坏字符”规则和“好后缀”规则，取两者后移比较大的值

> 实践中，经常用一维”坏字符“规则，原因是实现简单，且结合上述逻辑，不会出现死循环

In [3]:
# 1D bad character table
def bad_character_table(p):
    m = len(p)
    char = list(set(p))
    table = pd.Series([m] * len(char), index=char)
    table[p[-1]] = 1
    for i in range(m-1):
        table[p[i]] = m - 1 - i
    return table

char_table = bad_character_table('BCDBACD')
print(char_table)
print(char_table.get('F', len('BCDBACD')))

A    2
D    4
B    3
C    1
dtype: int64
7


In [4]:
# BM
def bm(s, p):
    n, m = len(s), len(p)
    if n < m:
        return -1
    bad_char = bad_character_table(p)
    good_suffix = good_suffix_table(p)
    i = m - 1
    while i < n:
        j = m - 1
        while j >= 0 and s[i] == p[j]:
            i -= 1
            j -= 1
        if j == -1:
            return i + 1
        i += max(bad_char.get(s[i], m), good_suffix[j]) # max(bad_char.loc[s[i], j], good_suffix[j])
    return -1

print(bm('BCBAABACAABABACAA', 'ABABAC'))

9


## 复杂度分析

> “坏字符“规则：一维是O(S + M)，二维是O(S * M), S是考虑的字符集，M是P的长度
> “好后缀”规则：O(M2)
> 查找：O(N*M)

> 使用场景：M远比N小

================= 分割线 =================

In [29]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [31]:
'''
kmp
1. 从前往后匹配
2. 出现不匹配字符时，在模式串的前缀子串中迭代找可以匹配的字符
'''

# next 数组存前、后缀最长公共子串长度
def build_next(p):
    next = [0] * len(p)
    
    j = 0
    for i in range(1, len(p)):
        # p[j] 是最长公共前缀串的下一个字符
        while j > 0 and p[i] != p[j]:
            j = next[j-1]
        
        if p[i] == p[j]:
            j += 1
        
        next[i] = j
    
    return next

# next 数组存最长公共前缀的最后一个字符的下标
def build_next_v2(p):
    next = [-1] * len(p)
    j = -1
    for i in range(1, len(p)):
        # p[j+1] 是最长公共前缀串的下一个字符
        while j != -1 and p[i] != p[j+1]:
            j = next[j]
        
        if p[i] == p[j+1]:
            j += 1
        
        next[i] = j
    
    return next


build_next("ababaca")
build_next_v2("ababaca")


def kmp(s, p):
    next = build_next(p)
    # 模式串待比较的字符下标
    j = 0
    for i in range(len(s)):
        while j > 0 and s[i] != p[j]:
            j = next[j-1]

        if s[i] == p[j]:
            j += 1
        
        if j == len(p):
            return i - j + 1
    
    return -1


def kmp_v2(s, p ):
    next = build_next_v2(p)
    # p已经匹配的字符下标
    j = -1
    for i in range(len(s)):
        while j != -1 and s[i] != p[j+1]:
            j = next[j]
        
        if s[i] == p[j+1]:
            j += 1
        
        if j == len(p) - 1:
            return i - j
        
    return -1


kmp('hello', 'll')
kmp_v2('hello', 'll')


'\nkmp\n1. 从前往后匹配\n2. 出现不匹配字符时，在模式串的前缀子串中迭代找可以匹配的字符\n'

[0, 0, 1, 2, 3, 0, 1]

[-1, -1, 0, 1, 2, -1, 0]

2

2

In [33]:
'''
bm
1. 从后往前匹配
2. 出现不匹配时，使用坏字符规则和好后缀规则后移
'''

def bm(s, p):
    bc_table = bad_character(p)
    gs_table = good_suffix_table(p)
    i = len(p) - 1
    while i < len(s):
        j = len(p) - 1
        while j >= 0 and s[i] == p[j]:
            i -= 1
            j -= 1

        if j < 0:
            return i + 1
            
        i += max(bc_table[ord(s[i])- ord('a')], gs_table[j])

    return -1

def bad_character(p):
    table = [len(p)] * 256
    for i in range(len(p)):
        table[ord(p[i])-ord('a')] = len(p) - 1 - i
    
    return table


def good_suffix(p):
    table = [0] * len(p)
    table[-1] = 1
    def not_on_left():
        last_prefix_len = 0
        # i 是好后缀开头字符下标
        for i in range(len(p)-1, 0, -1):
            suffix_len = len(p) - i
            if p[i:] == p[:len(p)-i]:
                prefix_len = suffix_len
                last_prefix_len = prefix_len
            else:
                prefix_len = last_prefix_len
            
            # i - 1 失配下标
            table[i-1] = len(p) + suffix_len - prefix_len


    def on_left():
        def common_suffix_len(i, j):
            length = 0
            while i >= 0 and p[i] == p[j]:
                length += 1
                i -= 1
                j -= 1
            
            return length
        
        for i in range(len(p)-1):
            suffix_len = common_suffix_len(i, len(p)-1)
            if suffix_len > 0:
                table[len(p)-1-suffix_len] = len(p) - (i + 1 - suffix_len)
    
    not_on_left()
    on_left()
    return table

bm('BCBAABACAABABACAA', 'ABABAC')

'\nbm\n1. 从后往前匹配\n2. 出现不匹配时，使用坏字符规则和好后缀规则后移\n'

9

In [57]:
def permute(nums, k):
    n = len(nums)
    permu = []
    def dfs(start):
        if start == k:
            permu.append(nums[:k])
        for i in range(start, n):
            nums[i], nums[start] = nums[start], nums[i]
            dfs(start+1)
            nums[i], nums[start] = nums[start], nums[i]
    
    dfs(0)
    return permu

permute([1,2,3], 2)


[[1, 2], [1, 3], [2, 1], [2, 3], [3, 2], [3, 1]]

In [52]:
def combine(nums, k):
    n = len(nums)
    combs = []
    comb = []
    def dfs(start):
        if len(comb) == k:
            combs.append(comb[:])
            return
        for i in range(start, n):
            comb.append(nums[i])
            dfs(i+1)
            comb.pop()
    
    dfs(0)
    return combs

combine([1,2,3,4], 3)
        

[[1, 2, 3], [1, 2, 4], [1, 3, 4], [2, 3, 4]]

In [60]:
def knapsack1(w, target):
    n = len(w)
    dp = [[False] * (target+1)] * n
    dp[0][0] = True
    if w[0] <= target:
        dp[0][w[0]] = True
    
    for i in range(1, n):
        for j in range(target+1):
            if not dp[i-1][j]:
                continue
            # not select
            dp[i][j] = True
            # select
            if j + w[i] <= target:
                dp[i][j+w[i]] = True
    
    
    for i in range(target, -1, -1):
        if dp[n-1][i]:
            return i
    
    return 0

knapsack1([1,2,2,4,6], 9)

9

In [61]:
def knapsack1_v2(w, target):
    n = len(w)
    dp = [False] * (target+1)
    dp[0] = True
    
    if w[0] <= target:
        dp[w[0]] = True

    for i in range(1, n):
        for j in range(target-w[i], -1 , -1):
            if dp[j]:
                dp[j+w[i]] = True

    for i in range(target, -1, -1):
        if dp[i]:
            return i

    return 0

knapsack1_v2([1,2,2,4,6], 9)

9

In [62]:
def knapsack2(w, v, capacity):
    n = len(w)
    dp = [[-1] * (capacity + 1)] * n
    dp[0][0] = 0
    if w[0] <= capacity:
        dp[0][w[0]] = v[0]
    
    for i in range(1, n):
        for j in range(capacity+1):
            if dp[i-1][j] == -1:
                continue
            # select
            dp[i][j] = dp[i-1][j]
            # not select
            if j + w[i] <= capacity:
                dp[i][j+w[i]] = max(dp[i][j+w[i]], dp[i-1][j]+v[i])
    
    return max(dp[n-1])

knapsack2([2,2,4,6,3], [3,4,8,9,6], 9)

18

In [63]:
def knapsack2_v2(w, v, capactiy):
    n = len(w)
    dp = [-1] * (capactiy+1)
    dp[0] = 0
    if w[0] <= capactiy:
        dp[w[0]] = v[0]
    
    for i in range(1, n):
        for j in range(capactiy-w[i], -1, -1):
            if dp[j] == -1:
                continue
            dp[j+w[i]] = max(dp[j+w[i]], dp[j]+v[i])
    
    return max(dp)

knapsack2([2,2,4,6,3], [3,4,8,9,6], 9)

18

In [67]:
def shortpath(matrix):
    n = len(matrix)
    res = float('inf')
    memo = {}

    def dfs(i, j, path):
        nonlocal res
        if i == n or j == n:
            return
        path += matrix[i][j]
        
        if (i, j) in memo and memo[(i, j)] < path:
            return

        memo[(i, j)] = path

        if i == n - 1 and j == n - 1:
            res = min(res, path)
            return
        
        dfs(i+1, j, path)
        dfs(i, j+1, path)
    dfs(0, 0, 0)
    return res


shortpath([[1,3,5,9],[2,1,3,4],[5,2,6,7],[6,8,4,3]])

19

In [69]:
def shortpath_v2(matrix):
    n = len(matrix)
    dp = [[None] * n for _ in range(n)]
    sum_ = 0
    for i in range(n):
        sum_ += matrix[0][i]
        dp[0][i] = sum_
    
    sum_ = 0
    for i in range(n):
        sum_ += matrix[i][0]
        dp[i][0] = sum_
    
    for i in range(1, n):
        for j in range(1, n):
            dp[i][j] = matrix[i][j] + min(dp[i-1][j], dp[i][j-1])
    
    return dp[-1][-1]

shortpath_v2([[1,3,5,9],[2,1,3,4],[5,2,6,7],[6,8,4,3]])


19

In [70]:
def shortpath_v3(matrix):
    n = len(matrix)
    dp = [None] * n
    sum_ = 0
    for j in range(n):
        sum_ += matrix[0][j]
        dp[j] = sum_
    
    for i in range(1, n):
        dp[0] += matrix[i][0]
        for j in range(1, n):
            dp[j] = matrix[i][j] + min(dp[j-1], dp[j])
    
    return dp[-1]

shortpath_v3([[1,3,5,9],[2,1,3,4],[5,2,6,7],[6,8,4,3]])

19

In [73]:
def jump(nums):
    n = len(nums)
    dp = [0] + [n-1] * (n - 1)
    for i in range(n):
        for j in range(1, nums[i]+1):
            dp[i + j] = min(dp[i +j], dp[i] + 1)
            if i + j == n - 1:
                return dp[-1]

jump([2,3,1,1,4])

2

In [2]:
from collections import defaultdict, deque
from typing import List


def findMinHeightTrees(n: int, edges: List[List[int]]) -> List[int]:
    degree = defaultdict(int)
    adj = [[] for _ in range(n)]
    for i, j in edges:
        adj[i].append(j)
        adj[j].append(i)
        degree[i] += 1
        degree[j] += 1
    degree = sorted((-v, k) for k, v in degree.items())

    def bfs(root):
        queue = deque([[root, 0]])
        visited = [False] * n
        height = 0
        while queue:
            node, h = queue.popleft()
            visited[node] = True
            height = max(height, h)
            for i in adj[node]:
                if visited[i]:
                    continue 
                queue.append([i, h+1])
        return height

    min_height = float('inf')
    res = [0]
    for _, node in degree:
        height = bfs(node)
        if height <= min_height:
            if height < min_height:
                    res.pop()
            min_height = height
            res.append(node)
        else:
            break
    return res

findMinHeightTrees(10, [[0,1],[0,2],[0,3],[2,4],[0,5],[5,6],[6,7],[2,8],[7,9]])

[0]

In [14]:
import heapq


class Solution:
    def findCheapestPrice(self, n: int, flights: List[List[int]], src: int, dst: int, k: int) -> int:
        adj = [[] for _ in range(n)]
        for f, t, price in flights:
            adj[f].append((t, price))
        # cost = [float('inf')] * n
        # cost[src] = 0

        queue = [(0, 0, src)]
        while queue:
            price, stop, city = heapq.heappop(queue)
            if city == dst:
                return price
            
            if stop >= k:
                continue
            
            for neighbor, p in adj[city]:
                heapq.heappush(queue, (price + p, stop+1, neighbor))
        
        
s = Solution()
s.findCheapestPrice(4, [[0,1,100],[1,2,100],[2,0,100],[1,3,600],[2,3,200]], 0, 3, 1)

In [26]:
import array
class Bitset:

    def __init__(self, size: int):
        self.cnt = 0
        self.capacity = size
        self.size = self._index(size-1) + 1
        self.words = array.array('B', [0] * self.size)
        self.flipped = False

    def _index(self, idx):
        return idx >> 3

    def fix(self, idx: int) -> None:
        i = self._index(idx)
        w = 1 << (idx & 7)
        if self.flipped:
            if self.words[i] & w:
                self.cnt -= 1
                self.words[i] &= ~w
        else:
            if not self.words[i] & w:
                self.cnt += 1
                self.words[i] |= w


    def unfix(self, idx: int) -> None:
        i = self._index(idx)
        w = 1 << (idx & 7)
        if self.flipped:
            if not self.words[i] & w:
                self.cnt += 1
                self.words[i] |= w
        else:
            if self.words[i] & w:
                self.cnt -= 1
                self.words[i] &= ~w

    def flip(self) -> None:
        self.flipped = not self.flipped

    def all(self) -> bool:
        return self.cnt == 0 if self.flipped else self.cnt == self.capacity

    def one(self) -> bool:
        return self.count() > 0

    def count(self) -> int:
        return self.capacity - self.cnt if self.flipped else self.cnt

    def toString(self) -> str:
        result = []
        for i, w in enumerate(self.words):
            bit_str = f'{w ^ (0xFF if self.flipped else 0):08b}'
            if i == self.size - 1:
                bit_str = bit_str[-(self.capacity & 7):]
            result.append(bit_str)
        return ''.join(result[::-1])

# Your Bitset object will be instantiated and called as such:
obj = Bitset(5)
obj.fix(3)
obj.fix(1)
obj.flip()
obj.toString()

'10101'