-
Notifications
You must be signed in to change notification settings - Fork 1
/
SearchStrategy.hs
163 lines (134 loc) · 6.35 KB
/
SearchStrategy.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
module SearchStrategy where
import Control.Monad.LazyRandom
import qualified Data.List as DL
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import System.Random (mkStdGen, Random, random, randomR, randoms, StdGen)
import Test.QuickCheck
import Beta
import HMMPlus
import qualified HyperTriangles as HT
import LazySearchModel
import MRFTypes
import NonUniform
import PsiPred
import Score
import StochasticSearch
import Viterbi
import Debug.Trace (trace)
-- | @isTick N t k@ is a clock that ticks @t@ times as
-- @k@ ranges over the interval [1..N]. It's used for
-- emitting diagnostic output at regular intervals of a search
isTick denom numIntervals num = denom > 0 &&
num * numIntervals `div` denom > (num - 1) * numIntervals `div` denom
tickProp (Positive n') (Positive t) =
(length $ filter id $ map (isTick n t) [1..n]) == t
where n = n' + t
-- | @takeNGenerations n generations@ returns the history of the
-- useful states found in @take n generations@. It would be nice
-- to use @take@ and @catMaybes@ here, but the function has to
-- make noise, too.
takeNGenerations :: Int -> [AUS a] -> History a
takeNGenerations n = take emptyHistory
where take (!older) (gen : younger) =
if showMe (ccost < n) then
take (gen `extendUsefulHistory` older) younger
else
older
where ccost = ccostOf gen
showMe = if isTick ccost 10 n then
trace (show ccost ++ " generations complete")
else
id
-- | @takeByCCostGap p pops@ accumulates from @pops@
-- as long as @p older younger@ is @True@, where
-- @older@ is the age of most recent *accepted* population
-- and @younger@ is the age of the *current* population
takeByCCostGap :: (CCost -> CCost -> Bool) -> [AUS a] -> History a
takeByCCostGap continue = take emptyHistory
where take (older @ (History (pop : _))) (gen : younger)
| not (continue (ccostOf pop) (ccostOf gen)) = older
take (!older) (gen : younger) = take (gen `extendUsefulHistory` older) younger
-- | @acceptableCCostGap searchP@ returns a function that tells if
-- an age gap is acceptable according to the search parameters.
-- If no maximum gap is specified, then *every* gap is acceptable.
acceptableCCostGap :: SearchParameters -> CCost -> CCost -> Bool
acceptableCCostGap searchP a age = maybe True (age - a <=) (convergenceAge searchP)
&& age < generations searchP
type InitialGuesser r
= HMM -> [SSPrediction] -> QuerySequence -> [BetaStrand] -> Rand r Placement
initialGuess :: forall r . RandomGen r => InitialGuesser r
initialGuess _ _ qs betas = initialGuess' betas 0
where initialGuess' :: [BetaStrand] -> Int -> Rand r Placement
initialGuess' [] _ = return []
initialGuess' (b:bs) lastGuess = do
pos <- getRandomR (lastGuess, betaSum)
initialGuess' bs (pos + len b) >>= return . (pos :)
where betaSum = U.length qs - sum (map len (b:bs))
{-
geoInitialGuess :: InitialGuesser r
geoInitialGuess _ _ seed qs betas = initialGuess' betas 0
where initialGuess' :: [BetaStrand] -> Int -> StdGen -> Placement
initialGuess' [] _ _ = []
initialGuess' (b:bs) lastGuess gen = pos : initialGuess' bs (pos + len b) gen'
where pos = head rand
betaSum = U.length qs - (foldr (+) 0 $ map len (b:bs))
(seed, gen') = random gen
rand = (randomsDist (mkStdGen seed)
$ zip [lastGuess..]
$ map ((/) 1.0)
$ take (betaSum - lastGuess)
$ map (** 1) ([1.0, 2.0..] :: [Double])) :: [Int]
-}
{-
predInitialGuess :: InitialGuesser
predInitialGuess _ preds seed qs betas = initialGuess' $ randoms $ mkStdGen seed
where initialGuess' :: [Int] -> Placement
initialGuess' [] = error "IMPOSSIBLE!"
initialGuess' (r:rs) = trace ((show guess) ++ " --- " ++ (show $ checkGuess betas guess)) $ if checkGuess betas guess then guess else initialGuess' rs
where guess = genGuess r (map (\p -> (residueNum p, beta_score p)) preds) $ le
ngth betas
genGuess :: (Random r, Fractional r, Ord r) => Seed -> [(Int, r)] -> Int -> Placement
genGuess seed dist n = DL.sort $ take n $ randomsDist (mkStdGen seed) dist
-}
projInitialGuess :: RandomGen r => InitialGuesser r
projInitialGuess hmm _ qs betas = return $ initialGuess' betas 0 0
where initialGuess' [] _ _ = []
-- trace ("g: " ++ show g ++ " f: " ++ show f ++ " pos: " ++ show pos ++ " lastB: " ++ show lastB ++ " lastP: " ++ show lastP) $
initialGuess' (b:bs) lastP lastB = g : initialGuess' bs g pos
where g = lastP + (floor $ (fromIntegral $ pos - lastB) * f)
f = (fromIntegral $ U.length qs) / (fromIntegral $ V.length hmm)
pos = resPosition $ head $ residues b
-- XXX name? contract?
checkGuess :: [BetaStrand] -> Placement -> Bool
checkGuess [] [] = True
checkGuess [] [g] = False
checkGuess [b] [] = False
checkGuess (b:bs) (g:gs) = noClash && next
where next = if noClash then checkGuess bs gs else False
noClash = case gs of
[] -> True
(g':gs') -> g' > g + len b
-- Notes for new initial guess approach:
-- Greedily consume area under curve proportional to beta strand fraction of total beta length
-- If the number of residues inadequate, expand area until we have enough residues
-- generate placement of beta strand start in range from beginning of partition to (end - len beta)
-- Move start of next position left to end of this beta
-- This is generative because we can violate constraints.
-- other approaches:
-- projection
-- projection with mutation
-- distribution of gaps
--------------------------
-- | Chooses every possible placement with equal probability
equalPlacement :: RandomGen r => QuerySequence -> [BetaStrand] -> Rand r Placement
equalPlacement query betas = do
gap : gaps <- HT.pointInTri (HT.D nGaps) (HT.L width)
return $ place gap betas gaps
where nGaps = length betas + 1
width = U.length query - sum (map len betas)
place i (beta:betas) (gapAfter:gaps) =
i : place (i+len beta+gapAfter) betas gaps
place i [] [] = if i == U.length query then []
else error "gaps in equalPlacement don't add up"