Skip to content
Newer
Older
100755 234 lines (178 sloc) 7.16 KB
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
1 #!/usr/bin/python3
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
2 import os
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
3 import os.path
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
4 import sys
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
5 from subprocess import Popen, PIPE
6 from argparse import ArgumentParser
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
7 from myconfig import MyConfig
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
8 import utils
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
9
10
11 config = MyConfig()
12
062a033 @epico fixes comments
epico authored Aug 6, 2011
13 #change cwd to the libpinyin data directory
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
14 libpinyin_dir = config.getToolsDir()
eaf1746 @epico update with new libpinyin path
epico authored Aug 6, 2011
15 libpinyin_sub_dir = os.path.join(libpinyin_dir, 'data')
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
16 os.chdir(libpinyin_sub_dir)
17 #chdir done
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
18
2575950 @epico fixes pep8 warnings
epico authored Jul 26, 2011
19
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
20 def validateModel(modelfile):
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
21 #begin processing
eaf1746 @epico update with new libpinyin path
epico authored Aug 6, 2011
22 cmdline = ['../utils/training/validate_k_mixture_model', \
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
23 modelfile]
24
25 subprocess = Popen(cmdline, shell=False, close_fds=True)
ef069f2 @epico clean up comments
epico authored Jul 26, 2011
26
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
27 (pid, status) = os.waitpid(subprocess.pid, 0)
28 if status != 0:
29 sys.exit('Corrupted model found when validating:' + modelfile)
30 #end processing
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
31
2575950 @epico fixes pep8 warnings
epico authored Jul 26, 2011
32
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
33 def exportModel(modelfile, textmodel):
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
34 #begin processing
eaf1746 @epico update with new libpinyin path
epico authored Aug 6, 2011
35 cmdline = ['../utils/training/export_k_mixture_model', \
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
36 '--k-mixture-model-file', \
37 modelfile]
38
39 subprocess = Popen(cmdline, shell=False, stdout=PIPE, \
40 close_fds=True)
41
42 with open(textmodel, 'wb') as f:
43 f.writelines(subprocess.stdout.readlines())
44
45 (pid, status) = os.waitpid(subprocess.pid, 0)
46 if status != 0:
47 sys.exit('Corrupted model found when exporting:' + modelfile)
48 #end processing
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
49
2575950 @epico fixes pep8 warnings
epico authored Jul 26, 2011
50
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
51 def convertModel(kmm_model, inter_model):
52 #begin processing
eaf1746 @epico update with new libpinyin path
epico authored Aug 6, 2011
53 cmdline = '../utils/training/k_mixture_model_to_interpolation <"' \
54 + kmm_model + '"'
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
55
c8c090a @epico fixes tryprune.py
epico authored Jul 27, 2011
56 subprocess = Popen(cmdline, shell=True, \
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
57 stdout=PIPE, close_fds=True)
58
59 with open(inter_model, 'wb') as f:
60 f.writelines(subprocess.stdout.readlines())
61
62 (pid, status) = os.waitpid(subprocess.pid, 0)
63 if status != 0:
64 sys.exit('Corrupted model found when converting:' + kmm_model)
65 #end processing
66
2575950 @epico fixes pep8 warnings
epico authored Jul 26, 2011
67
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
68 def mergeOneModel(mergedmodel, onemodel, score):
69
70 onemodelstatuspath = onemodel + config.getStatusPostfix()
71 onemodelstatus = utils.load_status(onemodelstatuspath)
72 if not utils.check_epoch(onemodelstatus, 'Estimate'):
73 raise utils.Epoch('Please estimate first.\n')
74 if score != onemodelstatus['EstimateScore']:
75 raise AssertionError('estimate scores mis-match.\n')
76
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
77 #begin processing
eaf1746 @epico update with new libpinyin path
epico authored Aug 6, 2011
78 cmdline = ['../utils/training/merge_k_mixture_model', \
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
79 '--result-file', \
80 mergedmodel, \
81 onemodel]
82
83 subprocess = Popen(cmdline, shell=False, close_fds=True)
ef069f2 @epico clean up comments
epico authored Jul 26, 2011
84
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
85 (pid, status) = os.waitpid(subprocess.pid, 0)
86 if status != 0:
87 sys.exit('Corrupted model found when merging:' + onemodel)
88 #end processing
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
89
2575950 @epico fixes pep8 warnings
epico authored Jul 26, 2011
90
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
91 def mergeSomeModels(mergedmodel, sortedindexname, mergenum):
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
92 last_score = 1.
93 #begin processing
94 indexfile = open(sortedindexname, 'r')
95 for i in range(mergenum):
96 line = indexfile.readline()
97 if not line:
98 raise AssertionError('No more models.\n')
99 line = line.rstrip(os.linesep)
100 (subdir, modelname, score) = line.split('#', 2)
101 score = float(score)
102 if score > last_score:
ef069f2 @epico clean up comments
epico authored Jul 26, 2011
103 raise AssertionError('scores must be descending.\n')
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
104
105 onemodel = os.path.join(config.getModelDir(), subdir, modelname)
9907d86 @epico add more outputs to tryprune.py
epico authored Jul 28, 2011
106
107 #validate first
108 print('validating')
109 validateModel(onemodel)
110
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
111 mergeOneModel(mergedmodel, onemodel, score)
112 last_score = score
113 indexfile.close()
114 #end processing
115
2575950 @epico fixes pep8 warnings
epico authored Jul 26, 2011
116
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
117 def pruneModel(prunedmodel, k, CDF):
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
118 #begin processing
eaf1746 @epico update with new libpinyin path
epico authored Aug 6, 2011
119 cmdline = ['../utils/training/prune_k_mixture_model', \
c8c090a @epico fixes tryprune.py
epico authored Jul 27, 2011
120 '-k', str(k), '--CDF', str(CDF),
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
121 prunedmodel]
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
122
123 subprocess = Popen(cmdline, shell=False, close_fds=True)
ef069f2 @epico clean up comments
epico authored Jul 26, 2011
124
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
125 (pid, status) = os.waitpid(subprocess.pid, 0)
126 if (status != 0):
127 sys.exit('Corrupted model found when pruning:' + modelfile)
128 #end processing
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
129
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
130
36b055a @epico begin to write tryprune.py
epico authored Jul 24, 2011
131 if __name__ == '__main__':
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
132 parser = ArgumentParser(description='Try prune models.')
133 parser.add_argument('--modeldir', action='store', \
134 help='model directory', \
135 default=config.getModelDir())
136
499b18a @epico suppress evaluate.py output
epico authored Jul 27, 2011
137 parser.add_argument('--merge', action='store', \
138 help='number of model candidates to be merged', \
1143427 @epico refine arguments
epico authored Jul 25, 2011
139 default=10, type=int)
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
140
141 parser.add_argument('-k', action='store', \
142 help='k parameter of k mixture model prune', \
1143427 @epico refine arguments
epico authored Jul 25, 2011
143 default=3, type=int)
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
144
145 parser.add_argument('--CDF', action='store', \
146 help='CDF parameter of k mixture model prune', \
1143427 @epico refine arguments
epico authored Jul 25, 2011
147 default=0.99, type=float)
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
148
3897e71 @epico improve speed of tryprune.py tool
epico authored Aug 5, 2011
149 parser.add_argument('--fast', action='store_const', \
150 help='Use in-memory filesystem to speed up prune',\
151 const=True, default=False)
152
b3b768c @epico write tryprune.py in progress
epico authored Jul 24, 2011
153 parser.add_argument('tryname', action='store', \
154 help='the storage directory')
155
156 args = parser.parse_args()
157 print(args)
3897e71 @epico improve speed of tryprune.py tool
epico authored Aug 5, 2011
158
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
159 tryname = 'try' + args.tryname
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
160
c8c090a @epico fixes tryprune.py
epico authored Jul 27, 2011
161 trydir = os.path.join(config.getFinalModelDir(), tryname)
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
162
163 #check try<name> directory
164 if os.access(trydir, os.F_OK):
1966705 @epico write eval rate
epico authored Jul 26, 2011
165 sys.exit(tryname + ' exists.')
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
166
167 os.makedirs(trydir)
168 cwdstatuspath = os.path.join(trydir, config.getFinalStatusFileName())
169 cwdstatus = {}
499b18a @epico suppress evaluate.py output
epico authored Jul 27, 2011
170 cwdstatus['PruneMergeNumber'] = args.merge
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
171 cwdstatus['PruneK'] = args.k
172 cwdstatus['PruneCDF'] = args.CDF
173 utils.store_status(cwdstatuspath, cwdstatus)
174
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
175 #merge model candidates
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
176 print('merging')
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
177 mergedmodel = os.path.join(trydir, 'merged.db')
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
178 sortedindexname = os.path.join(args.modeldir, \
179 config.getSortedEstimateIndex())
499b18a @epico suppress evaluate.py output
epico authored Jul 27, 2011
180 mergeSomeModels(mergedmodel, sortedindexname, args.merge)
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
181
9907d86 @epico add more outputs to tryprune.py
epico authored Jul 28, 2011
182 #validate merged model
183 print('validating')
184 validateModel(mergedmodel)
185
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
186 #export textual format
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
187 print('exporting')
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
188 exportfile = os.path.join(trydir, 'kmm_merged.text')
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
189 exportModel(mergedmodel, exportfile)
190
191 #prune merged model
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
192 print('pruning')
3897e71 @epico improve speed of tryprune.py tool
epico authored Aug 5, 2011
193
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
194 prunedmodel = os.path.join(trydir, 'pruned.db')
3897e71 @epico improve speed of tryprune.py tool
epico authored Aug 5, 2011
195 if args.fast:
196 shmmodel = os.path.join(config.getInMemoryFileSystem(), 'pruned.db')
197 if os.access(shmmodel, os.F_OK):
198 os.unlink(shmmodel)
199 #copy to memory
3d11ee5 @epico use utils.copyfile
epico authored Oct 22, 2012
200 utils.copyfile(mergedmodel, shmmodel)
3897e71 @epico improve speed of tryprune.py tool
epico authored Aug 5, 2011
201 pruneModel(shmmodel, args.k, args.CDF)
202 #copy to filesystem
3d11ee5 @epico use utils.copyfile
epico authored Oct 22, 2012
203 utils.copyfile(shmmodel, prunedmodel)
3897e71 @epico improve speed of tryprune.py tool
epico authored Aug 5, 2011
204 else:
205 #backup merged model
3d11ee5 @epico use utils.copyfile
epico authored Oct 22, 2012
206 utils.copyfile(mergedmodel, prunedmodel)
3897e71 @epico improve speed of tryprune.py tool
epico authored Aug 5, 2011
207 pruneModel(prunedmodel, args.k, args.CDF)
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
208
9907d86 @epico add more outputs to tryprune.py
epico authored Jul 28, 2011
209 #validate pruned model
210 print('validating')
211 validateModel(prunedmodel)
212
9cca418 @epico write tryprune.py in progress
epico authored Jul 25, 2011
213 #export textual format
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
214 print('exporting')
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
215 exportfile = os.path.join(trydir, 'kmm_pruned.text')
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
216 exportModel(prunedmodel, exportfile)
217
218 #convert to interpolation
219 print('converting')
220 kmm_model = exportfile
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
221 inter_model = os.path.join(trydir, config.getFinalModelFileName())
224b933 @epico write tryprune.py in progress
epico authored Jul 25, 2011
222 convertModel(kmm_model, inter_model)
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
223
9c3a0ed @epico improve tryprune
epico authored Jul 26, 2011
224 modelsize = utils.get_file_length(inter_model)
225 cwdstatus['PruneModelSize'] = modelsize
04428f2 @epico add store to tryprune
epico authored Jul 26, 2011
226 utils.store_status(cwdstatuspath, cwdstatus)
227
c8c090a @epico fixes tryprune.py
epico authored Jul 27, 2011
228 print('final model size:', modelsize)
9c3a0ed @epico improve tryprune
epico authored Jul 26, 2011
229
ee06075 @epico write tryprune.py
epico authored Jul 25, 2011
230 #sign status epoch
231 utils.sign_epoch(cwdstatus, 'Prune')
232 utils.store_status(cwdstatuspath, cwdstatus)
1966705 @epico write eval rate
epico authored Jul 26, 2011
233 print('done')
Something went wrong with that request. Please try again.