Skip to content

Commit

Permalink
refine the dealing of unicode and str
Browse files Browse the repository at this point in the history
  • Loading branch information
adaptee committed Nov 27, 2010
1 parent 67107c5 commit 4d5e905
Showing 1 changed file with 29 additions and 13 deletions.
42 changes: 29 additions & 13 deletions pinyin-comp
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,36 @@ def acronymize(uni_char):
# for most cases.
return fuzzynize( pinyin_initial )

def get_acronym(string):
"get acronym for unicode string"
if not isinstance(string, unicode):
try:
string = unicode(string, default_encoding)
except UnicodeDecodeError:
return string
def get_acronym(text):
"get acronym for text string"
text = unicodelize(text)

acronym = ""
acronym = u""

for char in string:
for char in text:
acronym += acronymize(char)

return acronym
return stringlize(acronym)

def unicodelize(text):
"try to convert string into unicode string."
if not isinstance(text, unicode):
try:
return unicode(text, default_encoding)
except UnicodeDecodeError:
pass

return text

def stringlize(text):
"try to convert unicode string back into string"
if isinstance(text, unicode):
try:
return text.encode(default_encoding)
except UnicodeEncodeError:
pass

return text

if __name__ == '__main__':

Expand All @@ -150,13 +165,14 @@ if __name__ == '__main__':
sys.exit(1)

dironly = sys.argv[1]
path = sys.argv[2].replace("\\","")

path = sys.argv[2].replace("\\","")
# support fuzzy pinyin
path = "".join( [ fuzzynize(x) for x in path] )

index = None
effective_path = path
index = None
effective_path = path

# deal with special form such as 'xxx/zj1'
if len(path) > 1 and '0' < path[-1] <= '9':
index = int(path[-1])
Expand Down

0 comments on commit 4d5e905

Please sign in to comment.