-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
110 lines (87 loc) · 3.31 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
#-*-coding:utf-8-*-
# author : "qiulimao"
# email : "qiulimao@getqiu.com"
import datetime
import six
from six import iteritems
import hashlib
from news.models import Vocabulary
import os,stat
from os.path import dirname
from news.configure import getDBConfigure,setDBConfigure
import logging
logger = logging.getLogger(__name__)
"""
util tools
"""
#---------- code begins below -------
def convert2date(dtstr):
"""
将字符串时间转化为对象
"""
if isinstance(dtstr,datetime.date):
return dtstr
if isinstance(dtstr,datetime.datetime):
return dtstr.date()
return datetime.datetime.strptime(dtstr, "%Y-%m-%d").date()
def utf8(string):
"""
Make sure string is utf8 encoded bytes.
If parameter is a object, object.__str__ will been called before encode as bytes
"""
if isinstance(string, six.text_type):
return string.encode('utf8')
elif isinstance(string, six.binary_type):
return string
else:
return six.text_type(string).encode('utf8')
md5 = lambda x: hashlib.md5(utf8(x)).hexdigest()
def time2str(time_):
return time_.strftime("%Y-%m-%d")
def build_user_dict():
"""
创建新的用户自定义词典
"""
DIR = dirname(os.path.abspath(__file__))
# 666
filePermision = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH
userDictFile = os.path.join(DIR,"userdict.txt")
useUserDefinedDict = getDBConfigure("USE_USER_DEFINED_DICT",default=0,type_=lambda x:bool(int(x)))
if not useUserDefinedDict:
nullUserDictFile = os.path.join(DIR,"nulluserdict.txt")
if not os.path.exists(nullUserDictFile):
with open(nullUserDictFile,"w+") as f:
print "created nulluserdict.txt"
os.chmod(nullUserDictFile,filePermision)
print "USE_USER_DEFINED_DICT=OFF,use null dict instead"
return nullUserDictFile
needRebuildUserDefinedDict = getDBConfigure("RE_BUILD_USER_DEFINED_DICT",default=1,type_=lambda x:bool(int(x)))
if not needRebuildUserDefinedDict:
if not os.path.exists(userDictFile):
with open(userDictFile,"w+") as f:
print "File userdict.txt does not exist,create an empty one"
os.chmod(userDictFile,filePermision)
print "use the old userdict.txt file"
return userDictFile
##
# 需要重新制作字典
##
setDBConfigure("RE_BUILD_USER_DEFINED_DICT",option=0) # 不用再重新制作字典了,重置标志位
if os.path.exists(userDictFile):
os.remove(userDictFile)
template_line = u"{word} {frequency} {characteristic}\n"
with open(userDictFile,'w') as f:
wordCount = Vocabulary.objects.filter(brand="user").count()
step = 300
writted = 0
while writted < wordCount:
words = Vocabulary.objects.filter(brand="user")\
.values("word","frequency","characteristic")\
.all()[writted:writted+step]
for word in words:
f.write(template_line.format(**word).encode("utf-8"))
writted = writted + step
os.chmod(userDictFile,filePermision)
print "rebuilt the userdict.txt"
return userDictFile