Skip to content
Browse files

publish

  • Loading branch information...
1 parent 9cc0187 commit 4464c83db60ee5fcc852397f6ae5186096e069ed @rjurney committed Sep 30, 2012
Showing with 1 addition and 0 deletions.
  1. +1 −0 tfidf.macro
View
1 tfidf.macro
@@ -1,5 +1,6 @@
DEFINE tf_idf(in_relation, id_field, text_field) RETURNS out_relation {
+ /* Note: we should be using the Lucene tokenizer, TOKENIZE on whitespace isn't good enough */
token_records = foreach $in_relation generate $id_field, FLATTEN(TOKENIZE($text_field)) as tokens;
/* Calculate the term count per document */

0 comments on commit 4464c83

Please sign in to comment.
Something went wrong with that request. Please try again.