Permalink
Browse files

import *.clj files from TinyWordSegmenter

  • Loading branch information...
1 parent 48ce74a commit 4599140eb6e50708180acb0ba0e20d2c4bc7597e @syou6162 committed Feb 27, 2012
View
7 project.clj
@@ -0,0 +1,7 @@
+(defproject fobos_clj "0.0.1"
+ :description "Implementation of Forward Backward Splitting"
+ :dependencies [[org.clojure/clojure "1.3.0"]
+ [org.clojure/clojure-contrib "1.2.0"]]
+ :dev-dependencies [[swank-clojure "1.3.2"]]
+ :jvm-opts ["-Xmx20g" "-server" "-Dfile.encoding=UTF-8"]
+ :main fobos_clj.core)
View
44 src/fobos_clj/core.clj
@@ -0,0 +1,44 @@
+(ns fobos_clj.core
+ (:use fobos_clj.util)
+ (:use fobos_clj.fobos)
+ (:use fobos_clj.svm)
+ (:use [clojure.contrib.duck-streams :only (reader read-lines)])
+ (:use [clojure.contrib.command-line :only (with-command-line)]))
+
+(defn- read-examples [filename]
+ (vec (remove nil? (for [line (read-lines filename)]
+ (try (parse-line line)
+ (catch Exception e nil))))))
+
+(defn run [opts]
+ (let [train-examples (read-examples (opts :train-filename))
+ test-examples (read-examples (opts :test-filename))
+ gold (vec (map first test-examples))
+ max-iter (opts :max-iter)
+ eta (opts :eta)
+ lambda (opts :lambda)
+ init-weight (update-weight train-examples {} 0 eta lambda)]
+ (loop [iter 1
+ weight init-weight]
+ (if (= iter max-iter)
+ weight
+ (do
+ (println iter ":"
+ (count weight) ":"
+ (get-f-value gold (map #(if (> (dotproduct weight (second %)) 0.0) 1 -1) test-examples)))
+ (recur (inc iter)
+ (update-weight train-examples weight iter eta lambda)))))))
+
+(defn -main [& args]
+ (with-command-line args "lein run --train-filename train.txt --test-filename test.txt --max-iter 10 --eta 1.0 --lambda 1.0"
+ [[train-filename "File name of train"]
+ [test-filename "File name of test"]
+ [max-iter "Maximum number of iteration"]
+ [eta "Update step"]
+ [lambda "Regularization parameter"]
+ rest]
+ (run {:train-filename train-filename
+ :test-filename test-filename
+ :max-iter (Integer/parseInt max-iter)
+ :eta (Double/parseDouble eta)
+ :lambda (Double/parseDouble lambda)})))
View
34 src/fobos_clj/fobos.clj
@@ -0,0 +1,34 @@
+(ns fobos_clj.fobos)
+
+(defn clip-by-zero [a b]
+ (if (> a 0.0)
+ (if (> a b) (- a b) 0.0)
+ (if (< a (- b)) (+ a b) 0.0)))
+
+(defn dotproduct "内積"
+ [weight fv]
+ (reduce (fn [sum [k v]]
+ (+ sum
+ (* v (get-in weight [k] 0.0))))
+ 0.0 fv))
+
+(defn get-eta [iter example-size]
+ "各iterationで重みを減衰させていく"
+ (/ 1.0 (+ 1.0 (/ iter example-size))))
+
+(defn l1-regularize
+ "L1正則化をかけて、sparseにした重みベクトルを返す"
+ [weight iter example-size lambda]
+ (let [lambda-hat (* (get-eta iter example-size) lambda)]
+ (reduce (fn [w [k v]]
+ (let [tmp-w (assoc w k (clip-by-zero v lambda-hat))]
+ (if (< (Math/abs v) lambda-hat)
+ (dissoc tmp-w k)
+ tmp-w)))
+ weight weight)))
+
+(defn muladd [weight fv y scale]
+ (reduce (fn [w [k xi]]
+ (assoc w k (+ (get-in w [k] 0.0)
+ (* y xi scale))))
+ weight fv))
View
18 src/fobos_clj/svm.clj
@@ -0,0 +1,18 @@
+(ns fobos_clj.svm
+ (:use [fobos_clj.fobos]))
+
+(defn margin [weight fv y]
+ (* (dotproduct weight fv) y))
+
+(defn update-weight [examples weight iter eta lambda]
+ (let [example-size (count examples)]
+ (l1-regularize
+ (reduce (fn [w [y fv]]
+ (if (< (margin w fv y) 1.0)
+ (muladd w fv y eta)
+ w))
+ weight examples)
+ iter example-size lambda)))
+
+(defn classify [weight fv y]
+ (> (margin weight fv y) 0.0))
View
24 src/fobos_clj/util.clj
@@ -0,0 +1,24 @@
+(ns fobos_clj.util
+ (:use [clojure.contrib.string :only (split)]))
+
+(defn parse-line [line]
+ (let [[y fv] (-> (re-seq #"([-+]?1)\s(.*)" line)
+ (first)
+ (rest))]
+ [(if (= y "+1") 1 -1)
+ (->> fv
+ (split #"\s")
+ (map #(let [[xi cnt] (split #":" %)]
+ [xi (Double/parseDouble cnt)]))
+ (vec))]))
+
+(defn get-f-value [gold prediction]
+ (let [freq (frequencies (map vector gold prediction))
+ tp (get freq [1 1] 0)
+ tn (get freq [-1 -1] 0)
+ fp (get freq [-1 1] 0)
+ fn (get freq [1 -1] 0)
+ recall (/ tp (+ tp fn))
+ precision (/ tp (+ tp fp))]
+ (/ (* 2.0 recall precision)
+ (+ recall precision))))
View
6 test/fobos_clj/test/core.clj
@@ -0,0 +1,6 @@
+(ns fobos_clj.test.core
+ (:use [fobos_clj.core])
+ (:use [clojure.test]))
+
+(deftest replace-me ;; FIXME: write
+ (is false "No tests have been written."))
View
74 test/fobos_clj/test/fobos.clj
@@ -0,0 +1,74 @@
+(ns fobos_clj.test.fobos
+ (:use [fobos_clj.fobos])
+ (:use [clojure.test]))
+
+(deftest test-clip-by-zero
+ (is (= (clip-by-zero 1.0 0.5) 0.5))
+ (is (= (clip-by-zero 1.0 1.5) 0.0))
+ (is (= (clip-by-zero -1.0 0.5) -0.5))
+ (is (= (clip-by-zero -1.0 1.5) 0.0)))
+
+(deftest test-dotproduct
+ (is (= 0.0
+ (dotproduct {} [[1 1] [3 2]])))
+ (is (= 0.0
+ (dotproduct {0 0.1,
+ 1 0.2,
+ 2 0.5,
+ 3 -0.1}
+ [])))
+ (is (= 0.0
+ (dotproduct {0 0.1,
+ 1 0.2,
+ 2 0.5,
+ 3 -0.1}
+ [[1 1] [3 2]])))
+ (is (= 0.0
+ (dotproduct {0 0.1,
+ "1" 0.2,
+ 2 0.5,
+ "3" -0.1}
+ [["1" 1.0] ["3" 2.0]])))
+ (is (= 10.0
+ (dotproduct {0 0.1,
+ "1" 0.2,
+ 2 0.5,
+ "3" -0.1
+ "100" 10}
+ [["1" 1.0] ["3" 2.0] ["100" 1.0]])))
+ (is (= 0.1
+ (dotproduct {{:type 31, :str ""} 0.1,
+ {:type 32, :str ""} 0.3}
+ [[{:type 31, :str ""} 1.0]]))))
+
+(deftest test-get-eta
+ (is (= 1.0
+ (get-eta 0 100)))
+ (is (= (/ 1.0 (+ 1.0 (/ 3 100)))
+ (get-eta 3 100))))
+
+(deftest test-l1-regularize
+ (let [weight {0 0.1, 1 0.2, 2 0.5, 3 -0.1}]
+ (let [iter 0
+ example-size 1000
+ lambda 1.0]
+ (is (empty?
+ (l1-regularize weight iter example-size lambda))))
+ (let [iter 0
+ example-size 100
+ lambda 0.001]
+ (is (not (empty?
+ (l1-regularize weight iter example-size lambda)))))
+ (let [iter 1000
+ example-size 100
+ lambda 1.0]
+ (is (not (empty?
+ (l1-regularize weight iter example-size lambda)))))))
+
+(deftest test-muladd
+ (let [weight {0 0.1, 1 0.2, 2 0.5, 3 -0.1}
+ fv [[1 1] [2 1.0] [100 1]]]
+ (is (= (muladd weight fv 1 1.0)
+ {0 0.1, 1 1.2, 2 1.5, 3 -0.1, 100 1.0}))
+ (is (= (muladd weight fv -1 1.0)
+ {0 0.1, 1 -0.8, 2 -0.5, 3 -0.1, 100 -1.0}))))
View
24 test/fobos_clj/test/svm.clj
@@ -0,0 +1,24 @@
+(ns fobos_clj.test.svm
+ (:use [fobos_clj.svm])
+ (:use [clojure.test]))
+
+(deftest test-margin
+ (let [weight {0 0.1, 1 0.2, 2 0.5, 3 -0.1}
+ fv [[1 1] [2 1]]]
+ (is (= (margin weight fv 1)
+ 0.7))
+ (is (= (margin weight fv -1)
+ -0.7))))
+
+(deftest test-update-weight
+ (let [examples [[1 [[1 1] [2 1]]]
+ [-1 [[1 2] [2 0]]]]
+ init-weight {0 0.1, 1 0.2, 2 0.5, 3 -0.1}
+ iter 10
+ eta 0.1
+ lambda 0.1]
+ (update-weight examples init-weight iter eta lambda)))
+
+(deftest test-classify
+ (let [weight {0 0.1, 1 0.2, 2 0.5, 3 -0.1}]
+ (is (classify weight [[1 1] [2 1]] 1))))
View
28 test/fobos_clj/test/util.clj
@@ -0,0 +1,28 @@
+(ns fobos_clj.test.util
+ (:use [fobos_clj.util])
+ (:use [clojure.test]))
+
+(deftest test-parse-line
+ (is (= (parse-line "+1 1:2 2:3")
+ [1 [["1" 2.0] ["2" 3.0]]]))
+ (is (= (parse-line "-1 1:2.05 2:0.1")
+ [-1 [["1" 2.05] ["2" 0.1]]]))
+ (is (= (parse-line "+1 40:0.039590 75:0.039590 89:0.039590 92:0.039590")
+ [1 [["40" 0.039590] ["75" 0.039590]
+ ["89" 0.039590] ["92" 0.039590]]]))
+ (is (= (parse-line "-1 40:0.039590 75:0.039590 89:0.039590 92:0.039590")
+ [-1 [["40" 0.039590] ["75" 0.039590]
+ ["89" 0.039590] ["92" 0.039590]]])))
+
+(deftest test-get-f-value
+ (let [gold [-1 -1 1 -1 1 -1 1 1]
+ predict [1 -1 1 -1 1 -1 1 -1]
+ tp 3
+ fp 1
+ fn 1
+ tn 3
+ precision (/ tp (+ tp fp))
+ recall (/ tp (+ tp fn))]
+ (is (= (/ (* 2.0 recall precision)
+ (+ recall precision))
+ (get-f-value gold predict)))))

0 comments on commit 4599140

Please sign in to comment.