Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Integration of berkeley LM.

  • Loading branch information...
commit 090487b7e85b83056dde92416ab7e0c05951b847 1 parent 01058e4
@julosaure authored
View
7 project.clj
@@ -1,8 +1,13 @@
(defproject decloder "1.0.0-SNAPSHOT"
:description "FIXME: write description"
+
+ :repositories {"local" ~(str (.toURI (java.io.File. "mvn_repo")))}
+
+ ; maven groupID/artifactID version
:dependencies [[org.clojure/clojure "1.3.0"]
[org.clojure/data.priority-map "0.0.2"]
+ [berkeleylm/berkeleylm "1.1.2"]
]
-
+
:jvm-opts ["-Xmx10242m"]
:main decloder.core)
View
49 src/decloder/blm.clj
@@ -0,0 +1,49 @@
+(ns decloder.blm
+ (:require clojure.java.shell)
+ (:require clojure.string)
+ (:import [java.util Arrays])
+ (:import [edu.berkeley.nlp.lm.io LmReaders])
+ (:import [edu.berkeley.nlp.lm NgramLanguageModel])
+ )
+
+;; GLOBALS
+
+(def LM_QUERY "java -ea -mx1000m -server -cp ../src edu.berkeley.nlp.lm.io.ComputeLogProbabilityOfTextStream ")
+
+;(def LM_BIN "/Users/julien/workspaces/clojure/berkeleylm-1.1.2/examples/big_test.binary")
+
+(def LM_BIN "/Users/julien/workspaces/xp/europarl/europarl-v7.fr-en.fr.tok.low.se.berk_lmbin")
+
+(def LM nil)
+
+;; UTILS
+
+
+;; FUNCTIONS
+
+(defn call-lm [lm list-ngrams]
+; (let [l (Arrays/asList list-ngrams)]
+ (- (.getLogProb lm list-ngrams))
+ ; )
+ )
+
+
+
+(defn load-lm []
+ (println "Loading LM " LM_BIN)
+ (LmReaders/readLmBinary LM_BIN)
+ )
+
+(defn score-ngrams [lm n-grams]
+ {:pre [(string? n-grams)
+ (> (count n-grams) 0)]
+ :post [(pos? %)]}
+
+ ;(if (nil? LM)
+ ; (load-lm)
+ ; )
+
+ (let [list-n-grams (clojure.string/split n-grams #" ")]
+ (call-lm lm list-n-grams)
+ )
+ )
View
8 src/decloder/model.clj
@@ -3,6 +3,7 @@
(:require clojure.java.io)
(:import [java.io BufferedReader FileReader])
(:import [java.lang Math])
+ (:require decloder.blm)
)
@@ -12,7 +13,7 @@
(def VOC_TRG "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.trn.trg.vcb")
-(def LEX_PROB "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.t3.final.small")
+(def LEX_PROB "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.t3.final")
;; UTILS
@@ -89,8 +90,9 @@
(let [[voc-src-id voc-id-src] (read-voc VOC_SRC)
[voc-trg-id voc-id-trg] (read-voc VOC_TRG)
- lex-prob (read-lex-prob LEX_PROB)]
+ lex-prob (read-lex-prob LEX_PROB)
+ lm (decloder.blm/load-lm)]
;(println (sort (filter #(.startsWith (key %) "ann") voc-src)))
- {:voc-src-id voc-src-id, :voc-id-src voc-id-src, :voc-trg-id voc-trg-id, :voc-id-trg voc-id-trg, :lex-prob lex-prob}
+ {:voc-src-id voc-src-id, :voc-id-src voc-id-src, :voc-trg-id voc-trg-id, :voc-id-trg voc-id-trg, :lex-prob lex-prob, :lm lm}
))
View
29 src/decloder/translator.clj
@@ -3,7 +3,7 @@
(:require clojure.java.io)
(:import [java.io BufferedReader FileReader])
(:require clojure.data.priority-map)
- (:require decloder.lm)
+ (:require decloder.blm)
)
@@ -18,7 +18,7 @@
;; FUNCTIONS
-(defn score-hypothesis [trg-token lex-prob pred-hypo]
+(defn score-hypothesis [model trg-token lex-prob pred-hypo]
{:pre [(string? trg-token)
(= java.lang.Double (type lex-prob))
(>= lex-prob 0)
@@ -27,9 +27,10 @@
(if (nil? pred-hypo)
lex-prob
- (let [n-grams (str trg-token " " (:token pred-hypo))]
- (println "n-gram to score: " n-grams)
- (+ lex-prob (:score pred-hypo) (decloder.lm/score-ngrams n-grams))
+ (let [n-grams (str trg-token " " (:token pred-hypo))
+ lm-score (decloder.blm/score-ngrams (model :lm) n-grams)]
+ ;(println "n-gram to score: " n-grams " -> " lm-score)
+ (+ lex-prob (:score pred-hypo) lm-score)
)
)
)
@@ -44,7 +45,7 @@
trg-token ((model :voc-id-trg) trg-token-id)
lexical-prob (val lex-prob)
pred (Hypothesis. nil 0 nil)
- score (score-hypothesis trg-token lexical-prob pred)]
+ score (score-hypothesis model trg-token lexical-prob pred)]
(assoc stack (Hypothesis. trg-token score pred) score)
)
)
@@ -60,15 +61,15 @@
(let [src-token-id ((model :voc-src-id) src-token)]
(loop [stack_ stack
lex-probs ((model :lex-prob) src-token-id)
- tata (println "count lex-probs" (count lex-probs))]
+ ];tata (println "count lex-probs" (count lex-probs))]
(if (empty? lex-probs)
stack_
(let [lex-prob (first lex-probs)
trg-token-id (key lex-prob)
lexical-prob (val lex-prob)
trg-token ((model :voc-id-trg) trg-token-id)
- score (score-hypothesis trg-token lexical-prob top-hypo)]
- (recur (assoc stack_ (Hypothesis. trg-token score top-hypo) score) (rest lex-probs) "dd")
+ score (score-hypothesis model trg-token lexical-prob top-hypo)]
+ (recur (assoc stack_ (Hypothesis. trg-token score top-hypo) score) (rest lex-probs))
)
)
)
@@ -125,17 +126,17 @@
(loop [stacks_ stacks
cur-stack (stacks_ pos)
- titi (println "count cur-stack " (count cur-stack))
+ ;titi (println "count cur-stack " (count cur-stack))
prev-stack-pos 1
prev-stack (stacks_ (- pos prev-stack-pos))
- titi (println "count prev-stack " (count prev-stack))]
+ ];titi (println "count prev-stack " (count prev-stack))]
(if (and (not (nil? prev-stack)) (= 0 (count prev-stack)))
(let [prev-stack-pos_ (+ 1 prev-stack-pos)
prev-stack_ (stacks_ (- pos prev-stack-pos_))
];tit (println "count prev-stack " (count prev-stack_))
;toto (println "recur prev-stack 0")]
- (recur stacks_ cur-stack "dd" prev-stack-pos_ prev-stack_ "dd")
+ (recur stacks_ cur-stack prev-stack-pos_ prev-stack_)
)
(if (< (count cur-stack) MAX_HYPO_PER_STACK)
@@ -144,9 +145,9 @@
cur-stack_ (extend-hypo model cur-stack top-hypo src-token)
tata (println "count3 " (count cur-stack_))]
(recur (assoc stacks_ pos cur-stack_)
- cur-stack_ "dddd"
+ cur-stack_
prev-stack-pos
- (rest prev-stack) "dd")
+ (rest prev-stack))
)
stacks_
)
Please sign in to comment.
Something went wrong with that request. Please try again.