Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: julosaure/decloder
base: 7104764974
...
head fork: julosaure/decloder
compare: 09c57c16f5
Checking mergeability… Don't worry, you can still create the pull request.
  • 3 commits
  • 4 files changed
  • 0 commit comments
  • 1 contributor
View
4 project.clj
@@ -1,6 +1,8 @@
(defproject decloder "1.0.0-SNAPSHOT"
:description "FIXME: write description"
- :dependencies [[org.clojure/clojure "1.3.0"]]
+ :dependencies [[org.clojure/clojure "1.3.0"]
+ [org.clojure/data.priority-map "0.0.2"]
+ ]
:jvm-opts ["-Xmx512m"]
:main decloder.core)
View
15 src/decloder/core.clj
@@ -3,7 +3,8 @@
(:require clojure.java.io)
(:import [java.io BufferedReader FileReader])
- (:require decloder.model))
+ (:require decloder.model)
+ (:require decloder.translator))
;; GLOBALS
@@ -13,10 +14,14 @@
;; FUNCTIONS
+(defn read-sentences []
+ "Il y a quelques années , l' astronome français Alfred Vidal-Madjar donnait à l' un de ses ouvrages le joli titre Il pleut des planètes ."
+ )
(defn -main []
- (let [model (decloder.model/init-engine)]
- ;(read-sentences)
- ;(translate-sentences)
+ (let [model (decloder.model/init-engine)
+ sentence (read-sentences)]
+ (decloder.translator/translate-sentence model sentence)
)
- )
+ )
+
View
19 src/decloder/model.clj
@@ -6,9 +6,9 @@
;; GLOBALS
-(def VOC_SRC "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.trn.src.vcb")
+(def VOC_SRC "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.trn.trg.vcb")
-(def VOC_TRG "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.trn.trg.vcb")
+(def VOC_TRG "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.trn.src.vcb")
(def LEX_PROB "/Users/julien/workspaces/clojure/decloder/data/sentfr/fr-en.t3.final.small")
@@ -58,11 +58,24 @@
)
)
+(defn startswith [str pat]
+ (loop [str_ str
+ pat_ pat]
+ (if pat_
+ (if (= (first pat_) (first str_))
+ (recur (rest str_) (rest pat_))
+ false
+ )
+ true
+ )
+ )
+ )
+
(defn init-engine []
(let [voc-src (read-voc VOC_SRC)
voc-trg (read-voc VOC_TRG)
lex-prob (read-lex-prob LEX_PROB)]
- ;lex-prob
+ ;(println (sort (filter #(.startsWith (key %) "ann") voc-src)))
{:voc-src voc-src, :voc-trg voc-trg, :lex-prob lex-prob}
))
View
117 src/decloder/translator.clj
@@ -0,0 +1,117 @@
+(ns decloder.translator
+ (:require clojure.string)
+ (:require clojure.java.io)
+ (:import [java.io BufferedReader FileReader])
+ (:require clojure.data.priority-map)
+ )
+
+
+
+;; GLOBALS
+
+(def MAX_HYPO_PER_STACK 100)
+
+;; UTILS
+
+(defrecord Hypothesis [token, score, pred])
+
+;; FUNCTIONS
+
+(defn score-hypothesis [trg-token lex-prob pred-hypo]
+ (+ lex-prob (:score pred-hypo))
+ )
+
+(defn new-hypo [stack lex-prob]
+ (let [trg-token (second lex-prob)
+ lexical-prob (last lex-prob)
+ pred (Hypothesis. nil 0 nil)
+ score (score-hypothesis trg-token lexical-prob pred)]
+ (assoc stack (Hypothesis. trg-token score pred) score)
+ )
+ )
+
+(defn extend-hypo [model stack hypo src-token]
+ (loop [stack_ stack
+ lex-probs (filter #(= (first (key %)) src-token) (model :lex-prob))]
+ (if (empty? lex-probs)
+ stack
+ (let [lex-prob (first lex-probs)
+ trg-token (second lex-prob)
+ lexical-prob (last lex-prob)
+ score (score-hypothesis trg-token lexical-prob hypo)]
+ (recur (assoc stack_ (Hypothesis. trg-token score hypo) score) (rest lex-probs))
+ )
+ )
+ )
+ )
+
+(defn search [model src-sentence]
+
+ (loop [src-sentence_ src-sentence
+ pos 0
+ stacks {}]
+
+ (let [src-token (first src-sentence_)]
+ (println "Main loop, pos " pos ", token " src-token)
+ (if (nil? (stacks pos))
+ (recur src-sentence_ pos (assoc stacks pos (clojure.data.priority-map/priority-map)))
+
+ (if (> pos (count src-sentence_))
+ stacks
+
+ (if (= pos 0)
+ (recur (rest src-sentence_) (+ pos 1)
+ (reduce new-hypo (stacks 0)
+ (filter #(= (first (key %)) src-token) (model :lex-prob))))
+
+ (recur (rest src-sentence_) (+ pos 1)
+
+ (loop [stacks_ stacks
+ cur-stack (stacks_ pos)
+ prev-stack (stacks (- pos 1))
+ prev-stack-pos 0
+ top-hypo (nth prev-stack prev-stack-pos)]
+
+ (if (< (count cur-stack) MAX_HYPO_PER_STACK)
+ (let [cur-stack_ (extend-hypo model cur-stack top-hypo src-token)]
+ (recur (assoc stacks_ pos cur-stack_)
+ cur-stack_
+ prev-stack
+ (+ prev-stack-pos 1)
+ (nth prev-stack (+ prev-stack-pos 1)))
+ )
+ stacks_
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+
+
+
+(defn tokenize-sentence [s]
+ (map clojure.string/lower-case
+ (clojure.string/split s #" ")
+ )
+ )
+
+(defn tokens-to-ids [model s]
+ (let [voc-src (model :voc-src)]
+ (map #(voc-src %) s)
+ )
+ )
+
+
+(defn translate-sentence [model sentence]
+ (println "Translating: " sentence)
+ (let [sent-tok (tokenize-sentence sentence)
+ sent-tok-id (tokens-to-ids model sent-tok)]
+ (println "Tokenized: " sent-tok)
+ (println "Ids: " sent-tok-id)
+ (search model sent-tok-id)
+ )
+ )

No commit comments for this range

Something went wrong with that request. Please try again.