Permalink
Browse files

fixes model size calculation generate.py

  • Loading branch information...
1 parent b3c2761 commit 5b2b30242c8b07b954e7a0bf7b647c2a6e219c47 @epico epico committed Jul 27, 2011
Showing with 5 additions and 4 deletions.
  1. +5 −4 generate.py
View
@@ -28,7 +28,7 @@ def generateOneText(infile, modelfile, reportfile):
if not utils.check_epoch(infilestatus, 'Segment'):
raise utils.EpochError('Please segment first.\n')
if utils.check_epoch(infilestatus, 'Generate'):
- return
+ return False
#begin processing
cmdline = ['./gen_k_mixture_model', '--maximum-occurs-allowed', \
@@ -55,6 +55,7 @@ def generateOneText(infile, modelfile, reportfile):
utils.sign_epoch(infilestatus, 'Generate')
utils.store_status(infilestatuspath, infilestatus)
+ return True
#Note: should check the corpus file size, and skip the too small text file.
@@ -77,7 +78,7 @@ def storeModelStatus(modelfile, textnum, nexttextnum):
modelstatus['GenerateStart'] = textnum
modelstatus['GenerateEnd'] = nexttextnum
utils.sign_epoch(modelstatus, 'Generate')
- utils.store_status(modelstatuspath, modelstatus)
+ utils.store_status(modelstatuspath, modelstatus)
print(indexpath, subdir, indexname)
@@ -114,14 +115,14 @@ def storeModelStatus(modelfile, textnum, nexttextnum):
print("Skipping " + title + '#' + textpath)
continue
- aggmodelsize += infilesize
modeldir = os.path.join(config.getModelDir(), subdir, indexname)
os.makedirs(modeldir, exist_ok=True)
modelfile = os.path.join(modeldir, \
config.getCandidateModelName(modelnum))
reportfile = modelfile + config.getReportPostfix()
print("Proccessing " + title + '#' + textpath)
- generateOneText(infile, modelfile, reportfile)
+ if generateOneText(infile, modelfile, reportfile):
+ aggmodelsize += infilesize
print("Processed " + title + '#' + textpath)
if aggmodelsize > config.getCandidateModelSize():
nexttextnum = i + 1

0 comments on commit 5b2b302

Please sign in to comment.