more refactoring from NR

ndaniels · Mar 11, 2012 · 51e97a9 · 51e97a9
1 parent ce4fda1
commit 51e97a9
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 69 deletions.
diff --git a/SearchModel.hs b/SearchModel.hs
@@ -3,7 +3,6 @@ where
 
 type Score = Double
 
-data Scored a = Scored a Score
 infix /+/
 (/+/) :: Score -> Scored a -> Scored a
 x /+/ Scored a y = Scored a (x + y)
@@ -21,29 +20,34 @@ scoreOf (Scored _ x) = x
 --------------------------------------------------------
 data Placement = Placement String
 
+-- @ start scoredecl.tex
+data Scored a = Scored a Score
 type Scorer = Placement -> Scored Placement
+-- @ end scoredecl.tex
+-- @ start strategy.tex
 type Age  = Int -- number of generations explored
 type Seed = Int
 data SearchStrategy = 
  SS { gen0    :: Seed -> [Placement]
     , nextGen :: Seed -> Scorer 
               -> [Scored Placement] -> [Scored Placement]
-    , accept  :: Seed -> [Scored Age] -> Age -> Bool
-    , quit    ::         [Scored Age] -> Age -> Bool
+    , accept  :: Seed -> [Score] -> Age -> Bool
+    , quit    ::         [Score] -> Age -> Bool
     }
+-- @ end strategy.tex
 
 --------------------------------------------------------
+-- @ start search.tex
 search :: SearchStrategy -> Scorer -> [Seed]
-       -> (Scored Placement, [Scored Age])
-search strat score (s:seeds) = runFrom seeds firstGen [] 0
+       -> (Scored Placement, [Score])
+search strat score (s0:seeds) = runFrom seeds firstGen [] 0
  where
-  firstGen = map score $ gen0 strat s
-  runFrom :: [Seed] -> [Scored Placement] -> [Scored Age]
-          -> Age -> (Scored Placement, [Scored Age])
+  firstGen = map score $ gen0 strat s0
+  runFrom :: [Seed] -> [Scored Placement] -> [Score]
+          -> Age -> (Scored Placement, [Score])
   runFrom (s1:s2:seeds) oldPop oldHist age =
     let trialPop  = nextGen strat s1 score oldPop
-        trialHist =
-          Scored age (scoreOf $ minimum newPop) : oldHist
+        trialHist = (scoreOf $ minimum newPop) : oldHist
         (newPop, newHist) =
           if accept strat s2 newHist age then
             (trialPop, trialHist)
@@ -53,3 +57,11 @@ search strat score (s:seeds) = runFrom seeds firstGen [] 0
           (minimum newPop, newHist)
         else
           runFrom seeds newPop newHist (age + 1)
+-- @ end search.tex
+
+-- the following HOF can used to adapt strategy functions that use the old interface
+adapt :: ([Scored Age] -> Age -> a) -> ([Score] -> Age -> a)
+adapt f scores n = f (zipscore (iterate (flip (-) 1) n) scores) n
+  where zipscore (n:ns) (s:ss) = Scored n s : zipscore ns ss
+        zipscore _ _ = []
+
diff --git a/TODO b/TODO
@@ -1,3 +1,20 @@
+1. rename SearchGuess to Placement
+2. pull [BetaStrand] out of SearchStrategy and capture it in a closure
+
+
+try
+
+  data Scored a = Scored a Score
+  (/+/) :: Score -> Scored a -> Scored a
+  x /+/ Scored a y = Scored a (x + y)
+
+  instance Functor Scored where
+     fmap f (Scored a x) = Scored (f a) x
+
+  scoremin :: [Scored a] -> Scored a
+  scoremax :: [Scored a] -> Scored a
+
+
 Use fake inheritance in the search strategy modules
 
 Memoize viterbi at top level

diff --git a/Viterbi.hs b/Viterbi.hs
@@ -28,6 +28,9 @@ type StatePath = [ HMMState ]
 type ScorePathCons a = a -> [a] -> [a]
 type Result = (Score, StatePath)
 
+unscore :: Scored a -> (Score, a)
+unscore (Scored a x) = (x, a)
+
 consPath :: ScorePathCons a
 consPath x xs = x:xs
 
@@ -73,70 +76,53 @@ viterbi pathCons (hasStart, hasEnd) alpha query hmm =
         edge Del Mat = d_m
         edge _   _   = error "unimplemnted or disallowed HMM edge"
 
-        infix /+/
-
-        (/+/) :: Score -> Result -> Result
-        snoc  :: Result -> HMMState -> Result
-
-        score' /+/ (score, path) = (score' + score, path)
-        snoc (score, path) state = (score, state `pathCons` path)
+        insertProb j i = emissionProb (insertionEmissions $ hmm ! j) (res i)
+        matchProb  j i = emissionProb (matchEmisssios     $ hmm ! j) (res i)
 
-        -- @ start viterbi.tex -8
-        vpaper Mat j i = (eProb j i /+/ DL.minimum [ from Mat
-                                                , from Ins
-                                                , from Del
-                                                ]) `snoc` Mat
-          where from prev = tProb (edge prev Mat) (j-1) /+/
-                              viterbi' prev (j - 1) (i - 1)
-        -- @ end viterbi.tex
+        disallowed = Scored [] maxProb
 
         --------------------------------------------------------
-        vpaper' Mat j i =
-          (eProb j i /+/ DL.minimum [from Mat, from Ins, from Del]) 
-                                                       `snoc` Mat
+        -- @ start viterbi.tex -8
+        vpaper' Mat j i = fmap (pathCons Mat)
+          (eProb j i /+/ minimum [from Mat, from Ins, from Del]) 
          where from prev = tProb (edge prev Mat) (j-1) /+/
-                               viterbi' prev (j - 1) (i - 1)
+                                       viterbi' prev (j-1) (i-1)
+        -- @ end viterbi.tex
 
         -- node 1 and zeroth observation
-        viterbi'' Mat 1 0 = ( transProb hmm 0 m_m +
-                              emissionProb (matchEmissions $ hmm ! 1) (res 0)
-                            , [Mat]
-                            ) -- we came from 'begin'
-        viterbi'' Ins 1 0 = (maxProb, []) -- not allowed
-        viterbi'' Del 1 0 = (maxProb, []) -- not allowed
+        viterbi'' Mat 1 0 = Scored [Mat] (tProb m_m 0 + matchProb 1 0)
+                            --           ^^^^^^^^^^^^^ is this right?  ---NR
+                            -- we came from 'begin'
+        viterbi'' Ins 1 0 = disallowed
+        viterbi'' Del 1 0 = disallowed
 
         -- node 0 and zeroth observation, base of self-insert
-        viterbi'' Mat 0 0 = (maxProb, []) -- not allowed
-        viterbi'' Ins 0 0 = ( transProb hmm 0 m_i +
-                              emissionProb (insertionEmissions $ hmm ! 0) (res 0)
-                            , [Ins]
-                            )
-        viterbi'' Del 0 0 = (maxProb, []) -- not allowed
+        viterbi'' Mat 0 0 = disallowed
+        viterbi'' Ins 0 0 = Scored [Ins] (tProb m_i 0 + insertProb 0 0)
+        viterbi'' Del 0 0 = disallowed
 
         -- node 0 and no observations
-        viterbi'' Mat 0 (-1) = (transProb hmm 0 m_m, [])
-        viterbi'' Ins 0 (-1) = (maxProb, [])
-        viterbi'' Del 0 (-1) = (maxProb, [])
+        viterbi'' Mat 0 (-1) = Scored [] (tProb m_m 0)
+        viterbi'' Ins 0 (-1) = disallowed
+        viterbi'' Del 0 (-1) = disallowed
 
         -- node 0 but not zeroth observation
-        viterbi'' Mat 0 i = (maxProb,[]) -- not allowed
-        viterbi'' Ins 0 i = ( transProb hmm 0 i_i +
-                              emissionProb (insertionEmissions $ hmm ! 0) (res i) +
-                              score
-                            , pathCons Ins path
-                            ) -- possible self-insert cycle
-          where (score, path) = viterbi' Ins 0 (i - 1)
-        viterbi'' Del 0 i = (maxProb, []) -- not allowed
-        viterbi'' End 0 i = (transProb hmm 0 m_e, [Mat])
+        viterbi'' Mat 0 i = disallowed
+        viterbi'' Ins 0 i = fmap (pathCons Ins) $
+                            (tProb i_i 0 + insertProb 0 i) /+/ viterbi' Ins 0 (i-1)
+                            -- possible self-insert cycle
+
+        viterbi'' Del 0 i = disallowed
+        viterbi'' End 0 i = Scored [Mat] (tProb m_e 0)
 
         -- node 1 and no more observations (came from begin)
-        viterbi'' Mat 1 (-1) = (maxProb, []) -- not allowed
-        viterbi'' Ins 1 (-1) = (maxProb, []) -- not allowed
-        viterbi'' Del 1 (-1) = (transProb hmm 0 m_d, [Del]) -- came from begin
+        viterbi'' Mat 1 (-1) = disallowed
+        viterbi'' Ins 1 (-1) = disallowed
+        viterbi'' Del 1 (-1) = Scored [Del] (transProb hmm 0 m_d) -- came from begin
 
         -- not node 1 yet, but not more observations (came from delete)
-        viterbi'' Mat j (-1) = (maxProb, []) -- not allowed
-        viterbi'' Ins j (-1) = (maxProb, []) -- not allowed
+        viterbi'' Mat j (-1) = disallowed
+        viterbi'' Ins j (-1) = disallowed
         viterbi'' Del j (-1) = ( transProb hmm (j - 1) d_d + score
                                , pathCons Del path
                                ) -- came from delete
@@ -145,17 +131,7 @@ viterbi pathCons (hasStart, hasEnd) alpha query hmm =
         -- consume an observation AND a node
         -- I think only this equation will change when
         -- we incorporate the begin-to-match code
-        viterbi'' Mat j i = vpaper Mat j i
-
-        viterbi'' Mat j i = DL.minimum $ [ trans m_m Mat -- match came from match
-                                         , trans i_m Ins -- match came from insert
-                                         , trans d_m Del -- match came from delete
-                                         ]
-          where eProb = emissionProb (matchEmissions $ hmm ! j) (res i)
-                tProb = transProb hmm (j - 1)
-                trans transFn prevstate =
-                  let (score, path) = viterbi' prevstate (j - 1) (i - 1)
-                  in  (score + tProb transFn + eProb, pathCons Mat path)
+        viterbi'' Mat j i = vpaper' Mat j i
 
         -- match came from start                                            
         -- consume an observation but not a node
@@ -198,3 +174,10 @@ transProb :: HMM -> Int -> StateAcc -> Double
 transProb hmm nodenum state = case logProbability $ state (transitions (hmm ! nodenum)) of
                                    NonZero p -> p
                                    LogZero -> maxProb
+
+-- @ start vscore.tex
+data Scored a = Scored a Score
+(/+/) :: Score -> Scored a -> Scored a
+-- @ end vscore.tex
+infix /+/
+x /+/ Scored a y = Scored a (x + y)