Skip to content

Commit

Permalink
Reload tagger every 10,000 lines to keep memory constrained
Browse files Browse the repository at this point in the history
  • Loading branch information
dbamman committed Jan 13, 2012
1 parent 35bcfa2 commit 7ecc2e7
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/edu/cmu/cs/lti/ark/tweetnlp/RunPOSTagger.java
Expand Up @@ -58,7 +58,17 @@ public static void main(String[] args) throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(Opts.input), "UTF-8"));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(Opts.output), "UTF-8"));
String line;

int renewEvery=10000;
int lineNumber=0;

while((line = reader.readLine()) != null) {

// Re-read tagger every n lines to keep memory in check.
if (++lineNumber % renewEvery == 0) {
TweetTaggerInstance.getInstance().renew();
}

List<String> toks = Twokenize.tokenizeForTagger_J(line);
List<String> tags = doPOSTagging(toks);
if (Opts.format.equals("conll")) {
Expand Down

0 comments on commit 7ecc2e7

Please sign in to comment.