Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

changes for topic inference on documents with extended features (mall…

…et workflow only)
  • Loading branch information...
commit 3927cdc086230b23d7dfd2e46c0e76ec3bac1d79 1 parent 85cd680
@sbenthall authored
Showing with 4 additions and 3 deletions.
  1. +2 −1  infertopics.py
  2. +2 −2 settings.py
View
3  infertopics.py
@@ -7,7 +7,8 @@ def import_dir():
cmd = MALLET + "import-dir "
cmd += "--input %s " % (DATA_DIR)
cmd += "--output %s " % (MALLET_INPUT_FILE)
- cmd += "--keep-sequence --remove-stopwords"
+ cmd += "--keep-sequence --remove-stopwords "
+ cmd += "--token-regex [A-Za-z0-9]+"
os.system(cmd)
def train_topics():
View
4 settings.py
@@ -1,7 +1,7 @@
## INFER TOPICS PARAMETERS
-DATA_DIR = "data"
+DATA_DIR = "xdocs"
MALLET_INPUT_FILE = "input.mallet"
INFERENCER_FILE = "inferencer.mallet"
OUTPUT_STATE = "topic-state.gz"
@@ -10,6 +10,6 @@
NUM_TOP_WORDS = 30
-NUM_TOPICS = 25
+NUM_TOPICS = 50
OPTIMIZE_INTERVAL = 200
Please sign in to comment.
Something went wrong with that request. Please try again.