Skip to content

Commit

Permalink
make rules more strict
Browse files Browse the repository at this point in the history
Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
  • Loading branch information
njzjz committed Jul 31, 2023
1 parent 6ce4a8b commit 5ee3f26
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 8 deletions.
2 changes: 1 addition & 1 deletion qiuwenbot/filter/gov.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class CNGovFilter(TextReplaceFilter):

def __init__(self):
super().__init__(
r"""(中国|中國|北京|中共)(当局|當局|政府)""",
r"""(中国|中國|中共)(当局|當局|政府)""",
r"中国政府",
)

Expand Down
2 changes: 1 addition & 1 deletion qiuwenbot/filter/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class FakeManchukuoFilter(TextReplaceFilter):

def __init__(self):
super().__init__(
r"([^伪僞偽])(滿|满)洲(国|國)",
r"([^伪僞偽“‘「『])(滿|满)洲(国|國)",
r"\1伪满洲国",
)

Expand Down
10 changes: 5 additions & 5 deletions qiuwenbot/filter/tw.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ class TWUnivFilter1(TextReplaceFilter):

def __init__(self):
super().__init__(
r"(國立|国立)((臺|台)(灣|湾)((师范|師範|海洋|藝術|艺术|体育(运动)?|科技)?大(學|学)|戲曲學院|戏曲学院)|金门大学|金門大學)",
r"\2",
r"([^“‘「『])(國立|国立)((臺|台)(灣|湾)((师范|師範|海洋|藝術|艺术|体育(运动)?|科技)?大(學|学)|戲曲學院|戏曲学院)|金门大学|金門大學)",
r"\1\3",
)

@property
Expand All @@ -93,8 +93,8 @@ class TWUnivFilter2(TextReplaceFilter):
def __init__(self):
# only fix univ created after 1949
super().__init__(
r"(國立|国立)((高雄师范|高雄師範|彰化師範|彰化师范|台北艺术|臺北藝術|臺南|台南|體育|体育|阳明|陽明|阳明交通|陽明交通)大(学|學)|傳統藝術中心|传统艺术中心)",
r"台湾\2",
r"([^“‘「『])(國立|国立)((高雄师范|高雄師範|彰化師範|彰化师范|台北艺术|臺北藝術|臺南|台南|體育|体育|阳明|陽明|阳明交通|陽明交通)大(学|學)|傳統藝術中心|传统艺术中心)",
r"\1台湾\3",
)

@property
Expand Down Expand Up @@ -160,7 +160,7 @@ def __init__(self):
coutries_re = "|".join(coutries + countries_hant)

super().__init__(
r"([^国國])(台湾|台灣|臺湾|\[\[台湾\]\]|\[\[台灣\]\]|\[\[臺湾\]\])((和|与|、|,|,|或|或者|及|以及)(\[\[)?(%s))"
r"([^国國])(台湾|台灣|臺湾|\[\[台湾\]\]|\[\[台灣\]\]|\[\[臺湾\]\])((和|与|、|,|或|或者|及|以及)(\[\[)?(%s))"
% coutries_re,
r"\1中国台湾\3",
)
Expand Down
6 changes: 5 additions & 1 deletion tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_fileter():
<score>456</score>
<mapframe>789</mapframe>
[[香港主權移交]]了!
北京当局
中国当局
中华民国(台湾)
中华民国台北市
"""
Expand Down Expand Up @@ -124,12 +124,14 @@ def test_tw_univ():
r"""\
国立台湾大学
国立阳明大学
"国立体育大学"
"""
)
expected_text = dedent(
r"""\
台湾大学
台湾阳明大学
"国立体育大学"
"""
)
filter = FilterChain(default_filters)
Expand All @@ -142,6 +144,7 @@ def test_historical_authority():
text = dedent(
r"""\
满洲国
“满洲国”
伪满洲国
汪精卫政权
清治时期
Expand All @@ -150,6 +153,7 @@ def test_historical_authority():
expected_text = dedent(
r"""\
伪满洲国
“满洲国”
伪满洲国
汪伪政权
清朝时期
Expand Down

0 comments on commit 5ee3f26

Please sign in to comment.