Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ndaniels/MRFy
Browse files Browse the repository at this point in the history
  • Loading branch information
nrnrnr committed Jul 8, 2012
2 parents faf374b + ffb07c4 commit 4483f3f
Show file tree
Hide file tree
Showing 11 changed files with 1,032 additions and 14 deletions.
3 changes: 2 additions & 1 deletion Beta.hs
Expand Up @@ -117,7 +117,8 @@ addPairings strands = map (map (\n -> foldl annotate n strands)) strands
mkBetaResidues :: [StrandPair] -> [[BetaResidue]]
mkBetaResidues [] = []
mkBetaResidues (sp:sps) = residues1 : residues2 : mkBetaResidues sps
where (residues1, residues2) = mkResidues (zip3 (exposure sp) [s1..] secondIndexing) [] []
where (residues1, residues2) =
mkResidues (zip3 (unElist $ exposure sp) [s1..] secondIndexing) [] []

secondIndexing = case parallel sp of Parallel -> [s2..]
Antiparallel -> [s2, s2-1..]
Expand Down
16 changes: 16 additions & 0 deletions BetaProps.hs
@@ -0,0 +1,16 @@
module BetaProps
where

import Beta
import MRFTypes

import Test.QuickCheck

betaProps :: [(String, Property)]
betaProps = [ ("genStrands", property genStrands)
]

genStrands :: [StrandPair] -> Bool
genStrands sps = len >= 0
where len = length $ getBetaStrands sps

2 changes: 1 addition & 1 deletion HMMArby.hs
Expand Up @@ -45,7 +45,7 @@ prob :: Gen Double
prob = choose (0.0, 1.0)

instance Arbitrary (V.Vector HMMNode) where
shrink = (map V.fromList) . (shrink . V.toList)
shrink = (map V.fromList) . filter (\xs -> length xs >= 2) . (shrink . V.toList)
arbitrary = do
-- A random length is used here to essentially guarantee
-- that we get a list of HMMNodes with at least length 2.
Expand Down
9 changes: 3 additions & 6 deletions HMMProps.hs
Expand Up @@ -148,12 +148,9 @@ scoreHMM nss qss hss = scoreHMM' (reverse $ V.toList nss)
scoreHMM' (n:[]) [] [] t = aScore n Mat t
scoreHMM' (n:[]) (q:[]) (Mat:[]) _ = negLogZero
scoreHMM' (n:[]) (q:[]) (Del:[]) _ = negLogZero
scoreHMM' (n:[]) (q:[]) (Ins:[]) _ = eScore n q Ins +
aScore n Mat Ins
-- -- TODO check all these base cases vs Viterbi
-- scoreHMM' (n:[]) (q:[]) (Ins:[]) = eScore n q Ins +
-- aScore n (head hs) Ins +
-- scoreHMM' (n:ns) (q:qs) hs
scoreHMM' (n:[]) (q:[]) (Ins:[]) t = eScore n q Ins +
aScore n Ins t +
scoreHMM' [n] [] [] Ins
scoreHMM' (n:ns) qs (Del:hs) t = aScore n Del t +
scoreHMM' ns qs hs Del
scoreHMM' (n:ns) (q:qs) (Mat:hs) t = eScore n q Mat +
Expand Down
28 changes: 25 additions & 3 deletions MRFTypes.hs
Expand Up @@ -6,7 +6,7 @@ module MRFTypes
, matchEmissions, insertionEmissions
, StrandPair(..)
, Helix(..)
, Exposure(..), mkExposure
, Exposure(..), mkExposure, unElist
, Direction(..), mkDirection
, BetaStrand(..), BetaPosition, BetaResidue(..), BetaPair(..)
, TProb(..), TProbs, m_m, m_i, m_d, i_m, i_i, d_m, d_d, b_m, m_e
Expand All @@ -16,11 +16,13 @@ module MRFTypes
)
where

import Control.Applicative
import Data.Function
import Data.Ix
import Data.List -- (intercalate)
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import Test.QuickCheck
import Text.Printf

import Score
Expand All @@ -39,7 +41,7 @@ data HMMNode = HMMNode { nodeNum :: Int
}
-- @ end hmmnode.tex
instance Show HMMNode where
show node = "(#" ++ show (nodeNum node) ++ ": " ++ showTx (transitions node)
show node = "\n(#" ++ show (nodeNum node) ++ ": " ++ showTx (transitions node)
++ "; " ++ showEmit "M" (matEmissions node)
++ "; " ++ showEmit "I" (insEmissions node)
++ ")"
Expand Down Expand Up @@ -116,6 +118,13 @@ data StrandPair = StrandPair { firstStart :: Int
}
deriving (Show)

instance Arbitrary StrandPair where
arbitrary = StrandPair <$> first <*> second <*> len <*> d <*> e
where (d, e) = (arbitrary, arbitrary)
(first, second) = (choose (1, max), choose (1, max))
len = choose (1, max)
max = 1000

data Helix = Helix { startRes :: Int
, helixLength :: Int
}
Expand All @@ -134,12 +143,22 @@ data HMMHeader = HMMHeader { betas :: [BetaStrand] -- TODO add alphas (sst
data MRF = MRF { hmmHeader :: HMMHeader, hmm :: HMM }
deriving (Show)

type ExposureList = [ Exposure ]
newtype ExposureList = EList [ Exposure ] deriving Show

unElist :: ExposureList -> [Exposure]
unElist (EList exposures) = exposures

instance Arbitrary ExposureList where
arbitrary = fmap EList $ listOf (arbitrary :: Gen Exposure)


data Exposure = Buried -- 'i'
| Exposed -- 'o'
deriving (Show)

instance Arbitrary Exposure where
arbitrary = elements [Buried, Exposed]

mkExposure :: Char -> Exposure
mkExposure c
| c == 'i' = Buried
Expand All @@ -150,6 +169,9 @@ data Direction = Parallel -- "1"
| Antiparallel -- "-1"
deriving (Show)

instance Arbitrary Direction where
arbitrary = elements [Parallel, Antiparallel]

mkDirection :: String -> Direction
mkDirection s
| s == "1" = Parallel
Expand Down
7 changes: 6 additions & 1 deletion Perturb.hs
Expand Up @@ -8,6 +8,7 @@ module Perturb
, consistentScoring
, scoreableMetrics
, viterbiIsAwesome
, approxEq
)
where

Expand Down Expand Up @@ -300,9 +301,13 @@ noP7Dups :: Metrics -> Bool
noP7Dups ms = length (nub ss) == length ss
where ss = allp7 ms

approxEq :: Score -> Score -> Bool
approxEq (Score x) (Score x') = abs (x - x') < epsilon
where epsilon = 0.0000001

consistentScoring :: HMM -> QuerySequence -> Property
consistentScoring model query = printTestCase msg $
scoreOf vscored == hmmScore
scoreOf vscored `approxEq` hmmScore
where vscored = viterbi (:) HasNoEnd query model
hmmScore = scoreHMM model query (unScored vscored)
msg = unlines [ "Viterbi score " ++ show (scoreOf vscored)
Expand Down
2 changes: 1 addition & 1 deletion SearchStrategy.hs
Expand Up @@ -25,7 +25,7 @@ import Debug.Trace (trace)
-- | @isTick N t k@ is a clock that ticks @t@ times as
-- @k@ ranges over the interval [1..N]. It's used for
-- emitting diagnostic output at regular intervals of a search
isTick denom numIntervals num =
isTick denom numIntervals num = denom > 0 &&
num * numIntervals `div` denom > (num - 1) * numIntervals `div` denom

tickProp (Positive n') (Positive t) =
Expand Down
6 changes: 5 additions & 1 deletion Viterbi.hs
Expand Up @@ -49,11 +49,15 @@ viterbi pathCons right query hmm =
if numNodes == 0 then
Scored [] negLogOne
else
minimum $ [vee'' s (numNodes - 1) (seqlen - 1) | s <- [Mat, Ins, Del]] ++
minimum $ [transN s $
vee'' s (numNodes - 1) (seqlen - 1) | s <- [Mat, Ins, Del]] ++
case right of { HasEnd -> [bestEnd]; HasNoEnd -> [] }

-- trace (show state DL.++ " " DL.++ show node DL.++ " " DL.++ show obs) $
where
transN :: StateLabel -> Scored StatePath -> Scored StatePath
transN state sp = Scored (Prelude.reverse $ unScored sp)
((aScore state Mat (numNodes - 1)) + (scoreOf sp))
-- @ start memo.tex -8
vee'' = Memo.memo3 (Memo.arrayRange (Mat, End))
(Memo.arrayRange (0, numNodes))
Expand Down
3 changes: 3 additions & 0 deletions mkfile
Expand Up @@ -40,5 +40,8 @@ test:V: $TGT
fast-test:V: $TGT
./$prereq -test mini-strings

test-sandwich:V: $TGT
./$TGT testing/sandwich.hmm+ testing/sandwich.fasta

# note: to build profile: ghc --make Main.hs -O3 -rtsopts -o $TGT
# then, ghc --make Main.hs -O3 -prof -auto-all -caf-all -rtsopts -osuf p_o -o $TGT
5 changes: 5 additions & 0 deletions testing/sandwich.fasta
@@ -0,0 +1,5 @@
> d2sbaa_ b.29.1.1 (A:) Legume lectin {Soybean (Glycine max) [TaxId: 3847]}
AETVSFSWNKFVPKQPNMILQGDAIVTSSGKLQLNKVDENGTPKPSSLGRALYSTPIHIW
DKETGSVASFAASFNFTFYAPDTKRLADGLAFFLAPIDTKPQTHAGYLGLFNENESGDQV
VAVEFDTFRNSWDPPNPHIGINVNSIRSIKTTSWDLANNKVAKVLITYDASTSLLVASLV
YPSQRTSNILSDVVDLKTSLPEWVRIGFSAATGLDIPGESHDVLSWSFASNLPHA

0 comments on commit 4483f3f

Please sign in to comment.