Skip to content

Commit

Permalink
(fix) issue with punctuations
Browse files Browse the repository at this point in the history
  • Loading branch information
javadr committed May 19, 2023
1 parent 340173f commit 5c6efbd
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 7 deletions.
3 changes: 3 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
1.2.1 - 2023-05-23
-- Fix an issue with punctuation like '،ساختمان'

1.2 - 2023-03-28
-- Fix GUI execution issue

Expand Down
2 changes: 1 addition & 1 deletion negar/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

__version__ = "1.2"
__version__ = "1.2.1"

DATAFILE = Path(__file__).parent.absolute() / "data/untouchable.dat"
USERFILE = Path.home() / ".python-negar"
Expand Down
19 changes: 13 additions & 6 deletions negar/virastar.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ def __init__(self, text, *args):

def cleanup(self):
self._handle_urls(State.save)
# fix punctuation spaces at first
# : ; , ! ? and their Persian counterparts should have one space after and no space before
self.text = re.sub(
r'[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*',
r'\1 ',
self.text
)
if self._trim_leading_trailing_whitespaces:
self.text = '\n'.join([line.strip() for line in self.text.split('\n')])
self.cleanup_spacing() if self._cleanup_spacing else None
Expand Down Expand Up @@ -242,12 +249,12 @@ def fix_spacing_for_braces_and_quotes(self):
for begin, end in zip(['\(','\[','\{','"','«'], ['\)','\]','\}','"','»']):
self.text = re.sub(rf'[ ‌]*({begin})\s*([^{end}]+?)\s*?({end})[ ‌]*',
r' \1\2\3 ', self.text )
# : ; , ! ? and their Persian counterparts should have one space after and no space before
self.text = re.sub(
r'[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*',
r'\1 ',
self.text
)
# # : ; , ! ? and their Persian counterparts should have one space after and no space before
# self.text = re.sub(
# r'[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*',
# r'\1 ',
# self.text
# )
# special case for versioning numbers like 1.2.7
self.text = re.sub(r'([\d])([.])\s([\d])([.])\s([\d])', r'\1\2\3\4\5', self.text)
# special case for floating-point numbers like 12.7
Expand Down

0 comments on commit 5c6efbd

Please sign in to comment.