Permalink
Browse files

Added loading and saving HMMs, more performance tests, new default HM…

…Ms for convergance to better numbers, and probably more; I forgot to commit in a while.
  • Loading branch information...
1 parent 03cb288 commit 3fe845a89e6b9e4899badd870dd6c725fea4448e @mikeizbicki committed Mar 17, 2012
Showing with 296 additions and 48 deletions.
  1. +109 −13 BioHMM.hs
  2. +75 −27 HMM.hs
  3. +94 −0 HMMFile.hs
  4. +18 −8 HMMPerf.hs
View
@@ -1,21 +1,117 @@
+{-# LANGUAGE IncoherentInstances #-}
+
module HMM.BioHMM
where
import HMM
+-- import HMMPerf
+import HMMFile
- -- | constant variables
-
-bpList = "AGCT"
+import Control.Monad
+import Data.Array
+import Debug.Trace
+import System.IO
-DNA = "ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCCCCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAA"
+-- applyTFsItr hmm count
+-- | count==0 = return hmm
+-- | otherwise = do
+-- newhmm <- applyTFs hmm
+-- applyTFsItr newhmm (count-1)
+--
+-- applyTFs hmm = do
+-- tfL <- loadTF
+-- applyLoop hmm tfL
+-- where applyLoop hmm' tfL
+-- | tfL == [] = return hmm'
+-- | otherwise = do
+--
+-- -- next <- x
+-- putStrLn ""
+-- putStrLn ("TF: "++show (head tfL))
+-- let nexthmm = baumWelch hmm' (listArray (1,length $ head tfL) $ head tfL) 1
+-- -- return nexthmm
+-- putStrLn $ show hmm'
+-- applyLoop nexthmm $ tail tfL
- -- | enumerates the cross product of the language order number of times. This is used to initialize the states for our HMM
-
--- enumerateStates :: a -> Int -> [[a]]
-enumerateStates :: String -> Int -> [String]
-enumerateStates language order = enumerateStates' order [[]]
+createTFhmm file hmm = do
+ x <- strTF
+ let hmm' = baumWelch hmm (listArray (1,length x) x) 10
+-- hmmIO <- hmm
+ putStrLn $ show hmm'
+ saveHMM file hmm'
+ return hmm'
+-- seq hmm' $ putStrLn $ show hmm'
+
+-- createAllDNAhmm = do
+-- len <- [1000,10000,20000,30000]
+-- order <- [1,2,3]
+-- -- trace (show len ++ "-" ++ show order) $ return 1
+-- let file = "hmm/autowinegrape-"++show len++"-"++show order++".hmm"
+-- return $ createDNAhmm file len $ simpleMM "AGCT" order
+createAllDNAhmm = createAllDNAhmm' [(len,order) | len <- [1000,10000,20000,30000], order <- [1,2,3] ]
+ where createAllDNAhmm' (x:xs) = do
+ createDNAitr (fst x) (snd x)
+ createAllDNAhmm' xs
+
+createDNAitr len order = createDNAhmm ("hmm/autowinegrape-"++show len++"-"++show order++".hmm") (len) $ simpleMM "AGCT" (order)
+
+createDNAhmm file len hmm = do
+ dna <- loadDNAArray len
+ let hmm' = baumWelch hmm dna 10
+ putStrLn $ show hmm'
+ saveHMM file hmm'
+ return hmm'
+
+loadDNAArray len = do
+ dna <- readFile "dna/winegrape-chromosone2"
+ let dnaArray = listArray (1,len) $ filter isBP dna
+ return dnaArray
where
- enumerateStates' :: Int -> [String] -> [String]
- enumerateStates' order list
- | order == 0 = list
- | otherwise = enumerateStates' (order-1) [symbol:l | l <- list, symbol <- language]
+ isBP x = if x `elem` "AGCT"
+ then True
+ else False
+
+
+strTF = liftM (concat . map ((++) "")) loadTF
+loadTF = liftM (filter isValidStr) $ (liftM lines) $ readFile "TFBindingSites"
+
+isValidStr str = (length str > 0) && (not $ elemChecker "#(/)[]|N" str)
+
+elemChecker :: (Eq a) => [a] -> [a] -> Bool
+elemChecker elemList list
+ | elemList == [] = False
+ | otherwise = if (head elemList) `elem` list
+ then True
+ else elemChecker (tail elemList) list
+
+newHMM = HMM { states=[1,2]
+ , events=['A','G','C','T']
+ , initProbs = ipTest
+ , transMatrix = tmTest
+ , outMatrix = omTest
+ }
+
+ipTest s
+ | s == 1 = 0.1
+ | s == 2 = 0.9
+
+tmTest s1 s2
+ | s1==1 && s2==1 = 0.9
+ | s1==1 && s2==2 = 0.1
+ | s1==2 && s2==1 = 0.5
+ | s1==2 && s2==2 = 0.5
+
+omTest s e
+ | s==1 && e=='A' = 0.4
+ | s==1 && e=='G' = 0.1
+ | s==1 && e=='C' = 0.1
+ | s==1 && e=='T' = 0.4
+ | s==2 && e=='A' = 0.1
+ | s==2 && e=='G' = 0.4
+ | s==2 && e=='C' = 0.4
+ | s==2 && e=='T' = 0.1
+
+
+bwTest = do
+ hmm <- loadHMM "hmm/test" ::IO (HMM String Char)
+ return $ baumWelch hmm (listArray (1,10) "AAAAAAGTGC") 10
View
@@ -1,9 +1,10 @@
module HMM
- ( HMM(..), rnf
+ ( HMM(..), Prob, rnf
, forward
, backward
- , baumWelch, baumWelchItr, baumWelchIO
+ , baumWelch, baumWelchItr --, baumWelchIO
, alpha, beta
+ , simpleMM, simpleMM2
)
where
@@ -23,20 +24,29 @@ type Prob = LogFloat
data -- (Eq eventType, Eq stateType, Show eventType, Show stateType) =>
HMM stateType eventType = HMM { states :: [stateType]
, events :: [eventType]
- , initProbs :: !(stateType -> Prob)
- , transMatrix :: !(stateType -> stateType -> Prob)
- , outMatrix :: !(stateType -> eventType -> Prob)
+ , initProbs :: (stateType -> Prob)
+ , transMatrix :: (stateType -> stateType -> Prob)
+ , outMatrix :: (stateType -> eventType -> Prob)
}
+-- deriving (Show, Read)
-instance NFData (HMM stateType eventType) where
- rnf a = a `seq` ()
+instance (Show stateType, Show eventType) => Show (HMM stateType eventType) where
+ show hmm = hmm2str hmm
-instance (Show state, Show observation) => Show (HMM state observation) where
- show hmm = "HMM" ++ " states=" ++ (show $ states hmm)
- ++ " events=" ++ (show $ events hmm)
- ++ " initProbs=" ++ (show [(s,initProbs hmm s) | s <- states hmm])
- ++ " transMatrix=" ++ (show [(s1,s2,transMatrix hmm s1 s2) | s1 <- states hmm, s2 <- states hmm])
- ++ " outMatrix=" ++ (show [(s,e,outMatrix hmm s e) | s <- states hmm, e <- events hmm])
+-- instance (Eq stateType, Eq eventType, Show stateType, Show eventType, Read stateType, Read eventType) => Read (HMM stateType eventType) where
+-- readsPrec _ str = [(array2hmm ((read str) :: HMMArray stateType eventType),"")]
+
+
+hmm2str hmm = "HMM" ++ "{ states=" ++ (show $ states hmm)
+ ++ ", events=" ++ (show $ events hmm)
+ ++ ", initProbs=" ++ (show [(s,initProbs hmm s) | s <- states hmm])
+ ++ ", transMatrix=" ++ (show [(s1,s2,transMatrix hmm s1 s2) | s1 <- states hmm, s2 <- states hmm])
+ ++ ", outMatrix=" ++ (show [(s,e,outMatrix hmm s e) | s <- states hmm, e <- events hmm])
+ ++ "}"
+
+elemIndex2 e list = case elemIndex e list of
+ Nothing -> seq (error "stateIndex: Index "++show e++" not in HMM "++show list) 0
+ Just x -> x
stateIndex :: (Show stateType, Show eventType, Eq stateType) => HMM stateType eventType -> stateType -> Int
stateIndex hmm state = case elemIndex state $ states hmm of
@@ -45,9 +55,55 @@ stateIndex hmm state = case elemIndex state $ states hmm of
eventIndex :: (Show stateType, Show eventType, Eq eventType) => HMM stateType eventType -> eventType -> Int
eventIndex hmm event = case elemIndex event $ events hmm of
- Nothing -> seq (error "stateIndex: Index "++show event++" not in HMM "++show hmm) 0
+ Nothing -> seq (error ("eventIndex: Index "++show event++" not in HMM "++show hmm)) 0
Just x -> x
+
+-- standardHMM :: [stateType] -> [eventType] -> HMM stateType eventType
+-- standardHMM sL eL = HMM { states=sL
+-- , events=eL
+-- , initProbs = ip
+-- , transMatrix = tm
+-- , outMatrix = om
+-- }
+-- where
+-- ip s = 1.0 / (logFloat $ length sL)
+-- tm s1 s2 = 1.0 / (logFloat $ length sL)
+-- om s e = 1.0 / (logFloat $ length eL)
+
+simpleMM2 eL order = HMM { states = sL
+ , events = eL
+ , initProbs = \s -> 1.0 / (logFloat $ length sL)
+ , transMatrix = \s1 -> \s2 -> if isPrefixOf (tail s1) s2
+ then 1.0 / (logFloat $ length eL)
+ else 0.0
+ , outMatrix = \s -> \e -> if (last s) == e
+ then 1.0
+ else 0.0
+ }
+ where sL = enumerateStates order [[]]
+ enumerateStates order' list
+ | order' == 0 = list
+ | otherwise = enumerateStates (order'-1) [symbol:l | l <- list, symbol <- eL]
+
+
+simpleMM eL order = HMM { states = sL
+ , events = eL
+ , initProbs = \s -> skewedDist s
+ , transMatrix = \s1 -> \s2 -> if (length s1==0) || (isPrefixOf (tail s1) s2)
+ then skewedDist s2 -- 1.0 / (logFloat $ length sL)
+ else 0.0
+ , outMatrix = \s -> \e -> 1.0/(logFloat $ length eL)
+ }
+ where evenDist = 1.0 / sLlen
+ skewedDist s = (logFloat $ 1+elemIndex2 s sL) / ( (sLlen * (sLlen+ (logFloat (1.0 :: Double))))/2.0)
+ sLlen = logFloat $ length sL
+ sL = enumerateStates (order-1) [[]]
+ enumerateStates order' list
+ | order' == 0 = list
+ | otherwise = enumerateStates (order'-1) [symbol:l | l <- list, symbol <- eL]
+
+
-- | forward algorithm
forward :: (Eq eventType, Eq stateType, Show eventType, Show stateType) => HMM stateType eventType -> [eventType] -> Prob
@@ -130,27 +186,18 @@ beta hmm obs = memo_beta
baumWelch :: (Eq eventType, Eq stateType, Show eventType, Show stateType) => HMM stateType eventType -> Array Int eventType -> Int -> HMM stateType eventType
baumWelch hmm obs count
| count == 0 = hmm
- | otherwise = baumWelch itr obs (count-1)
+ | otherwise = -- trace ("baumWelch iterations left: "++(show count)) $
+ trace (show itr) $
+ baumWelch itr obs (count-1)
where itr = baumWelchItr hmm obs
-
--- baumWelchIO hmm obs count = do
--- -- | count == 0 = putStrLn "done"
--- -- | otherwise = do
--- putStrLn ("Iterations left: "++(show count))
--- let newHMM = baumWelchItr hmm obs
--- -- seq newHMM $ putStrLn "test"
--- putStrLn $ show newHMM
--- if count==1
--- then return newHMM
--- else baumWelchIO newHMM obs (count-1)
baumWelchItr :: (Eq eventType, Eq stateType, Show eventType, Show stateType) => HMM stateType eventType -> Array Int eventType -> HMM stateType eventType
baumWelchItr hmm obs = --par newInitProbs $ par newTransMatrix $ par newOutMatrix
--trace "baumWelchItr " $
HMM { states = states hmm
, events = events hmm
, initProbs = memo_newInitProbs
- , transMatrix = {-transMatrix hmm-} memo_newTransMatrix
+ , transMatrix = {-newTransMatrix-} memo_newTransMatrix
, outMatrix = {-outMatrix hmm-} memo_newOutMatrix
}
where bT = snd $ bounds obs
@@ -206,5 +253,6 @@ baumWelchItr hmm obs = --par newInitProbs $ par newTransMatrix $ par newOutMatri
| otherwise = (outMatrix hmm (states hmm !! state') $ obs!t')*(sum [(memo_alpha (t'-1) state2)*(transMatrix hmm state2 (states hmm !! state')) | state2 <- states hmm])
+
-- debug utils
hmmid hmm = show $ initProbs hmm $ (states hmm) !! 1
View
@@ -0,0 +1,94 @@
+module HMMFile
+ ( loadHMM
+ , saveHMM
+ )
+ where
+
+import HMM
+
+import Debug.Trace
+import Data.Array
+import Data.List
+import Data.Number.LogFloat
+import qualified Data.MemoCombinators as Memo
+import Control.DeepSeq
+import Control.Parallel
+import System.IO
+import Text.ParserCombinators.Parsec
+
+data -- (Eq eventType, Eq stateType, Show eventType, Show stateType) =>
+ HMMArray stateType eventType = HMMArray
+ { statesA :: [stateType]
+ , eventsA :: [eventType]
+ , initProbsA :: Array Int Prob
+ , transMatrixA :: Array Int (Array Int Prob) -- (stateType -> stateType -> Prob)
+ , outMatrixA :: Array Int (Array Int Prob) -- (stateType -> eventType -> Prob)
+ }
+ deriving (Show,Read)
+
+instance Read LogFloat where
+ readsPrec a str = do
+ dbl <- readsPrec a (drop 8 str) :: [(Double,String)]
+-- trace ("LogFloat -> "++show str) $ [(logFloat ((read (drop 8 str)) :: Double), "")]
+ return (logFloat $ fst dbl, snd dbl)
+
+hmm2Array :: (Show stateType, Show eventType) => (HMM stateType eventType) -> (HMMArray stateType eventType)
+hmm2Array hmm = HMMArray { statesA = states hmm
+ , eventsA = events hmm
+ , initProbsA = listArray (1,length $ states hmm) [initProbs hmm state | state <- states hmm]
+ , transMatrixA = listArray (1,length $ states hmm) [
+ listArray (1,length $ states hmm) [transMatrix hmm s1 s2 | s1 <- states hmm]
+ | s2 <- states hmm]
+ , outMatrixA = listArray (1,length $ states hmm) [
+ listArray (1,length $ events hmm) [outMatrix hmm s e | e <- events hmm]
+ | s <- states hmm]
+ }
+
+array2hmm :: (Show stateType, Show eventType, Eq stateType, Eq eventType) => (HMMArray stateType eventType) -> (HMM stateType eventType)
+array2hmm hmmA = HMM { states = statesA hmmA
+ , events = eventsA hmmA
+ , initProbs = \s -> (initProbsA hmmA) ! (stateAIndex hmmA s)
+ , transMatrix = \s1 -> \s2 -> transMatrixA hmmA ! (stateAIndex hmmA s1) ! (stateAIndex hmmA s2)
+ , outMatrix = \s -> \e -> outMatrixA hmmA ! (stateAIndex hmmA s) ! (eventAIndex hmmA e)
+ }
+
+saveHMM :: (Show stateType, Show eventType) => String -> HMM stateType eventType -> IO ()
+saveHMM file hmm = do
+ outh <- openFile file WriteMode
+ hPutStrLn outh $ show $ hmm2Array hmm
+ hClose outh
+
+-- loadHMM :: (Read stateType, Read eventType) => String -> IO (HMM stateType eventType)
+loadHMM file = do
+ inh <- openFile file ReadMode
+ hmmstr <- hGetLine inh
+ let hmm = read hmmstr -- :: HMMArray stateType eventType
+ return (array2hmm hmm)
+
+
+stateAIndex :: (Show stateType, Show eventType, Eq stateType) => HMMArray stateType eventType -> stateType -> Int
+stateAIndex hmm state = case elemIndex state $ statesA hmm of
+ Nothing -> seq (error "stateIndex: Index "++show state++" not in HMM "++show hmm) 0
+ Just x -> x+1
+
+eventAIndex :: (Show stateType, Show eventType, Eq eventType) => HMMArray stateType eventType -> eventType -> Int
+eventAIndex hmm event = case elemIndex event $ eventsA hmm of
+ Nothing -> seq (error ("eventIndex: Index "++show event++" not in HMM "++show hmm)) 0
+ Just x -> x+1
+
+
+------------
+
+hmmParse :: {-(Read stateType, Read eventType) =>-} String -> Either ParseError (HMM String Char)
+hmmParse str = do
+ parse hmmParser str str
+
+hmmParser :: (Read stateType, Read eventType) => GenParser Char st (HMM stateType eventType)
+hmmParser = do
+ let hmm = HMM { states = []
+ , events = []
+ , initProbs = (\x -> 0)
+ , transMatrix = (\x -> \y -> 0)
+ , outMatrix = (\x -> \y -> 0)
+ }
+ return hmm
Oops, something went wrong.

0 comments on commit 3fe845a

Please sign in to comment.