-
Notifications
You must be signed in to change notification settings - Fork 8
/
settings.py
66 lines (60 loc) · 890 Bytes
/
settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Melanie Tosik
# NLP, Viterbi part-of-speech (POS) tagger
TRAIN = "WSJ/WSJ_02-21.pos"
DEV_WORDS = "WSJ/WSJ_24.words"
DEV_POS = "WSJ/WSJ_24.pos"
DEV_OUT = "output/wsj_24.pos"
TEST_WORDS = "WSJ/WSJ_23.words"
# TEST_POS = "WSJ/WSJ_23.pos"
TEST_OUT = "output/wsj_23.pos"
MODEL = "data/hmm_model.txt"
VOCAB = "data/hmm_vocab.txt"
UNK_TOKS = "data/unk_toks.txt"
CONFUSION_MATRIX = "docs/confusion_matrix.csv"
TAGS_WSJ = [
"#",
"$",
"''",
"(",
")",
",",
".",
":",
"CC",
"CD",
"DT",
"EX",
"FW",
"IN",
"JJ",
"JJR",
"JJS",
"LS",
"MD",
"NN",
"NNP",
"NNPS",
"NNS",
"PDT",
"POS",
"PRP",
"PRP$",
"RB",
"RBR",
"RBS",
"RP",
"SYM",
"TO",
"UH",
"VB",
"VBD",
"VBG",
"VBN",
"VBP",
"VBZ",
"WDT",
"WP",
"WP$",
"WRB",
"``",
]