In [1]:
# !pip install pypinyin
from pypinyin import lazy_pinyin, Style
import re
from difflib import SequenceMatcher

In [2]:
tone_re = re.compile("[ˊˇˋ˙]")

class CharPhone:
    def __init__(self, ch):
        assert len(ch)==1
        self.ch = ch
        self.zhuyin = lazy_pinyin(ch, style=Style.BOPOMOFO)[0]
        self.phones = tone_re.sub("", self.zhuyin)
    
    def __hash__(self):
        return hash(self.phones)
    
    def __eq__(self, other):        
        return self.phones == other.phones
    
    def __repr__(self):
        return f"<Char: {self.ch} ({self.zhuyin})>"
    
    def to_ignore(self):
        return not self.ch.strip()

In [3]:
def char_mapper(text):
    return [CharPhone(x) for x in text]

In [4]:
CharPhone("我") == CharPhone("火")

False

In [5]:
CharPhone("我") == CharPhone("窩")

True

In [6]:
SequenceMatcher(lambda x: x.to_ignore(), char_mapper("施氏食獅史"), char_mapper("適施氏適市")).ratio()

1.0

In [7]:
SequenceMatcher(lambda x: x=="", "施氏食獅史", "適施氏適市").ratio()

0.4