/
mallet_train-classifier.pyw
executable file
·42 lines (33 loc) · 1.14 KB
/
mallet_train-classifier.pyw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python
import sys, os, logging, traceback, time, subprocess
import mallet
class MalletClassifier(mallet.Mallet):
"""
Train a classifier
"""
def _basic_params(self):
self.dry_run = False
self.name = "mallet_train-classifier"
self.dfr = False
def process(self):
self._setup_mallet_instances(sequence=False)
self.mallet_output = os.path.join(self.mallet_out_dir, "trained.classifier")
process_args = self.mallet + ["cc.mallet.classify.tui.Vectors2Classify",
"--input", self.instance_file,
"--output-classifier", self.mallet_output,
"--trainer", "NaiveBayes",
"--noOverwriteProgressMessages", "true"]
logging.info("begin training classifier")
start_time = time.time()
if not self.dry_run:
classifier_return = subprocess.call(process_args, stdout=self.progress_file, stderr=self.progress_file)
finished = "Classifier trained in " + str(time.time() - start_time) + " seconds"
logging.info(finished)
params = {'DONE': finished}
self.write_html(params)
if __name__ == "__main__":
try:
processor = MalletClassifier(track_progress=False)
processor.process()
except:
logging.error(traceback.format_exc())