Permalink
Browse files

initial commit

  • Loading branch information...
ngrunwald committed Aug 5, 2011
0 parents commit c857cdc11ee4923a2890a129c6eb47201fbbf094
Showing with 53 additions and 0 deletions.
  1. +6 −0 .gitignore
  2. +13 −0 README
  3. +3 −0 project.clj
  4. +25 −0 src/clj_treetagger.clj
  5. +6 −0 test/clj_treetagger/test/core.clj
@@ -0,0 +1,6 @@
+pom.xml
+*jar
+/lib/
+/classes/
+.lein-failures
+.lein-deps-sum
13 README
@@ -0,0 +1,13 @@
+# clj-treetagger
+
+FIXME: write description
+
+## Usage
+
+FIXME: write
+
+## License
+
+Copyright (C) 2011 FIXME
+
+Distributed under the Eclipse Public License, the same as Clojure.
@@ -0,0 +1,3 @@
+(defproject clj-treetagger "0.1.01"
+ :description "Functional Clojure wrapper for tt4j et Treetagger"
+ :dependencies [[org.annolab.tt4j "1.0.15"]])
@@ -0,0 +1,25 @@
+(ns clj-treetagger
+ (:import [org.annolab.tt4j TreeTaggerWrapper TokenHandler]))
+
+(defn make-pos-tagger
+ [home-path model-path]
+ (System/setProperty "treetagger.home" home-path)
+ (let [tt (TreeTaggerWrapper.)]
+ (doto tt
+ (.setModel model-path))))
+
+(defn tag-tokens
+ [tagger tokens]
+ (let [collect (atom [])]
+ (doto tagger
+ (.setHandler
+ (proxy [TokenHandler] []
+ (token [token pos lemma]
+ (swap! collect (fn [arr] (conj arr {:token token :pos pos :lemma lemma})))))))
+ (.process tagger tokens)
+ @collect))
+
+(defn make-pos-tagger-fn
+ [home-path model-path]
+ (let [tagger (make-pos-tagger home-path model-path)]
+ (fn [tokens] (tag-tokens tagger tokens))))
@@ -0,0 +1,6 @@
+(ns clj-treetagger.test.core
+ (:use [clj-treetagger.core])
+ (:use [clojure.test]))
+
+(deftest replace-me ;; FIXME: write
+ (is false "No tests have been written."))

0 comments on commit c857cdc

Please sign in to comment.