/
Test.hs
328 lines (284 loc) · 12.7 KB
/
Test.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
{- |
Module : Test.hs
Description : Testing the Data.IntDisjointSet module
Copyright : (c) Myles C. Maxfield
License : BSD3
Maintainer : litherum@gmail.com
Stability : provisional
Portability : portable
This file has a main function, and is meant to be run from the command-line.
The return code of the program determines the success of the tests.
-}
import Control.Category
import Control.Arrow hiding (left, right)
import Control.Monad (guard)
import Data.IntDisjointSet
import qualified Data.List as L
import Data.Maybe
import qualified Data.Set as S
import Prelude hiding ((.), id, lookup, elem)
import Test.HUnit
import Test.QuickCheck
{-|
This is a reference implementation of the IntDisjointSet, implemented as a set of sets.
The 'disjoint' property is enforced by the insert function.
-}
type SlowIntDisjointSet = S.Set (S.Set Int)
empty' :: SlowIntDisjointSet
empty' = S.empty
{-|
This function is never needed, so to keep -Wall happy, it's commented out.
-}
{-
singleton' :: Int -> SlowIntDisjointSet
singleton' x = insert' x $ empty'
-}
{-|
Try to find the set. If the set is empty, that means that the set doesn't exist, in
which case, we're all set to insert it. Inserting it just creates a set with 1 element
and adds it to the SlowIntDisjointSet. Otherwise, it does nothing.
-}
insert' :: Int -> SlowIntDisjointSet -> SlowIntDisjointSet
insert' x sids
| S.null $ findSet x sids = S.insert (S.singleton x) sids
| otherwise = sids
{-|
Folding insert' over a list
-}
insertList' :: [Int] -> SlowIntDisjointSet -> SlowIntDisjointSet
insertList' l sids = foldl (flip insert') sids l
{-|
Find the set that x is in, and the set that y is in. Delete those two sets from the
SlowIntDisjointSet. If x or y isn't found, its containing set will be empty, and delete
does nothing if the given element isn't in the set. Once you've deleted the two sets
from the SlowIntDisjointSet, insert the union of the two back in.
-}
union' :: Int -> Int -> SlowIntDisjointSet -> SlowIntDisjointSet
union' x y sids = S.insert (S.union xset yset) $ S.delete xset $ S.delete yset sids
where xset = findSet x sids
yset = findSet y sids
{-|
Iterate through all the constituent sets. If the given element is a member of the
curent set, set the accumulator equal to the current set. Otherwise, keep the
accumulator empty.
-}
findSet :: Int -> SlowIntDisjointSet -> S.Set Int
findSet a sids = S.fold f S.empty sids
where f e old
| S.member a e = e
| otherwise = old
{-|
The union of all the constituent sets
-}
getElements :: SlowIntDisjointSet -> S.Set Int
getElements sids = S.fold S.union S.empty sids
{-|
This is the same as insertList', but operates on the real IntDisjointSet. It's just a
convenience function.
-}
insertList :: [Int] -> IntDisjointSet -> IntDisjointSet
insertList xs set = foldr insert set xs
---------------------------------------------------------
{-|
This type is used to join an instance of the real IntDisjointSet with an instance of the
reference implementation, so we can compare the instances.
-}
newtype IntDisjointSets = IntDisjointSets (IntDisjointSet, SlowIntDisjointSet)
deriving (Show)
{-|
Convenience function.
applyNTimes 4 f = f . f . f . f
applyNTimes 5 f = f . f . f . f . f
-}
applyNTimes :: Category cat => Int -> cat b b -> cat b b
applyNTimes n f = (foldr (.) id (replicate n f))
{-|
Create random IntDisjointSets by following a procedure of random inserts and random
unions. Each of these operations is performed on both the IntDisjointSet and the
reference implementation. This way, we get mirror images of each other, so our tests
can compare properties of the two.
-}
instance Arbitrary IntDisjointSets where
arbitrary = sized f
where f items = runKleisli (applyNTimes n $ Kleisli oneunion) starter
-- First, insert 'items' elements into both *IntDisjointSets
where starter = IntDisjointSets (insertList [1..items] empty,
insertList' [1..items] empty')
-- This is the number of random unions to perform
n = items - (floor $ sqrt (fromIntegral items :: Double))
-- Perform a single random union. We can use the 'Gen' monad to get
-- randomness. Because the applyNTimes function works on any
-- category, we can use it on this with the Kleisli operator.
oneunion :: IntDisjointSets -> Gen IntDisjointSets
oneunion (IntDisjointSets (ids, sids)) = do
x <- choose (1, items)
y <- choose (1, items)
return $ IntDisjointSets (union x y ids, union' x y sids)
---------------------------------------------------------
testRedundantInsert :: Test
testRedundantInsert = TestCase $ assertEqual "Inserting an element that's already inserted doesn't cause the number of elements to grow" 1 $
disjointSetSize $ insert elem $ singleton elem
where elem = 0
testRedundantInsertSize :: Test
testRedundantInsertSize = TestCase $ assertEqual "Inserting an element that's already inserted doesn't cause the number of sets to grow" 1 $
size $ insert elem $ singleton elem
where elem = 0
testInsertGrowsNumberOfSets :: Test
testInsertGrowsNumberOfSets = TestCase $ assertEqual "Inserting an element that hasn't already been inserted grows the number of sets by one" 2 $
disjointSetSize $ insert 1 $ singleton 2
testInsertGrowsTotalSize :: Test
testInsertGrowsTotalSize = TestCase $ assertEqual "Inserting an element that hasn't already been inserted grows the number of elements by one" 2 $
size $ insert 1 $ singleton 2
{-|
Inserting n elements makes the IntDisjointSet have n constituent elements
-}
testInsertListSize :: [Int] -> Bool
testInsertListSize l = size (insertList l empty) == length (L.nub l)
{-|
Inserting n elements makes the IntDisjointSet have n constituent sets
-}
testInsertListDisjointSetSize :: [Int] -> Bool
testInsertListDisjointSetSize l = disjointSetSize (insertList l empty) ==
length (L.nub l)
----------------------------------------------------------
{-|
For every item in the reference implementation, ask the IntDisjointSet for its
representative. Make sure that the item and the representative are in the same set in
the reference implementation
-}
testInsertConsistent :: IntDisjointSets -> Bool
testInsertConsistent (IntDisjointSets (ids, sids)) = all check $ S.toList $
getElements sids
where check e = findSet e sids == findSet rep sids
where rep = fromJust $ fst $ lookup e ids
{-|
Do the following for each set in the reference implementation:
For each item in the set, ask the IntDisjointSet for its representative. Make sure all
the representatives in this set are the same
-}
testMembersGoToSameItem :: IntDisjointSets -> Bool
testMembersGoToSameItem (IntDisjointSets (ids, sids)) = all f $ S.toList sids
where allTheSame [] = True
allTheSame l = all (== head l) $ tail l
f :: S.Set Int -> Bool
f s = allTheSame $ L.map (fst . ((flip lookup) ids)) $ S.toList s
{-|
The number of elements should be the same in the two implementations
-}
testSizesAreSame :: IntDisjointSets -> Bool
testSizesAreSame (IntDisjointSets (ids, sids)) = size ids == S.size (getElements sids)
{-|
The number of sets should be the same in the two implementations
-}
testNumberOfSetsAreSame :: IntDisjointSets -> Bool
testNumberOfSetsAreSame (IntDisjointSets (ids, sids)) = disjointSetSize ids ==
S.size sids
insertAndTest :: Int -> (Bool, IntDisjointSet) -> (Bool, IntDisjointSet)
insertAndTest x (_, set) =
let set' = insert x set
in case lookup x set of
-- x was already in set. The two sets should be unchanged.
(Just _, _) -> (L.sort (fst (toList set)) == L.sort (fst (toList set')), set')
-- x is new to the set. The sets should be unchanged except for entry x.
(Nothing, _) -> let xs = fst $ toList set'
in ((x,x) `L.elem` xs &&
L.sort (L.delete (x,x) xs) == L.sort (fst (toList set)), set')
testInsertAndTest :: [Int] -> Bool
testInsertAndTest l = all fst $ scanl (flip insertAndTest) (True, empty) l
unionAndTest :: (Int, Int) -> (Bool, IntDisjointSet) -> (Bool, IntDisjointSet)
unionAndTest (x', y') (_, set) = case (lookup x' set, lookup y' set) of
-- Both elements are present, but they may be in the same set
((Just x, _), (Just y, _)) -> (if x == y then L.sort (fst (toList set)) ==
L.sort (fst (toList unioned))
else size unioned == size set &&
disjointSetSize unioned == disjointSetSize set - 1,
unioned)
_ -> (True, set)
where unioned = union x' y' set
testUnionAndTest :: (IntDisjointSets, [(Int, Int)]) -> Bool
testUnionAndTest (IntDisjointSets (ids, _), l) = all fst $ scanl (flip unionAndTest) (True, ids) l
------------------------------------------------------------
{-|
These functions shouldn't change the IntDisjointSet in a user-visible way. The idea is
to run the previous test suite on the output of these functions, and they should still
all pass. The idea is to show that these operations are transparent.
-}
{-|
Call lookup on an element with the IntDisjointSet. Disregard the element's
representative, but keep track of the partially-optimized IntDisjointSet. Fold this
across all the elements in the reference implementation.
-}
optimizeByLookup :: IntDisjointSets -> IntDisjointSets
optimizeByLookup (IntDisjointSets (ids, sids)) = IntDisjointSets (optimized, sids)
where optimized = foldl (\ ids' e -> snd $ lookup e ids') ids $
S.toList $ getElements sids
{-|
Map (+1) across the reference implementation and the IntDisjointSet.
-}
optimizeByMap :: IntDisjointSets -> IntDisjointSets
optimizeByMap (IntDisjointSets (ids, sids)) = IntDisjointSets
(Data.IntDisjointSet.map (+ 1) ids, S.map (S.map (+ 1)) sids)
{-|
Disregard the input IntDisjointSet. Take the reference implementation and split it down
the middle into two groups of sets. Recreate these two groups of sets into two new
IntDisjointSets by inserting each element and then unioning the relevant sets together.
Then, merge the two IntDisjointSets together.
-}
optimizeByMerge :: IntDisjointSets -> IntDisjointSets
optimizeByMerge untouched@(IntDisjointSets (_, sids))
| S.size sids == 1 = untouched
| otherwise = IntDisjointSets (unsafeMerge left right, sids)
where sets = S.toList $ sids
(mergel, merger) = splitAt ((L.length sets) `div` 2) sets
(left, right) = (f mergel, f merger)
where f l = foldl g starter l
where starter = insertList (S.toList $ getElements $ S.fromList l)
empty
g shallow s = foldl (\ accum e -> union (head l') e accum)
shallow l'
where l' = S.toList s
--------------------------------------------------------------
{-|
All the HUnit-style functions that relate to the insert function
-}
testInsert :: Test
testInsert = TestList [ testRedundantInsert
, testRedundantInsertSize
, testInsertGrowsNumberOfSets
, testInsertGrowsTotalSize
]
{-|
Run quickCheck, and make sure the result is acceptable. mzero for IO is to throw an
exception, which is unchecked and will cause the program to terminate with an error
code, which is what we want. There's probably a better way to do this, but I can't find
it.
-}
runQuickCheckAndGuard :: Test.QuickCheck.Testable prop => prop -> IO ()
runQuickCheckAndGuard f = do
results <- quickCheckResult f
guard $ check results
where check (Success _ _ _) = True
check (GaveUp _ _ _) = False
check (Failure _ _ _ _ _ _ _) = False
check (NoExpectedFailure _ _ _) = False
{-|
Run all the tests
-}
main :: IO ()
main = do
results <- runTestTT $ TestList [testInsert]
guard (errors results == 0 && failures results == 0)
runQuickCheckAndGuard testInsertListSize
runQuickCheckAndGuard testInsertListDisjointSetSize
sequence_ [runQuickCheckAndGuard (x . y) | x <- [ testInsertConsistent
, testMembersGoToSameItem
, testSizesAreSame
, testNumberOfSetsAreSame
],
y <- [ id
, optimizeByLookup
, optimizeByMap
, optimizeByMerge
]]
runQuickCheckAndGuard testInsertAndTest
runQuickCheckAndGuard testUnionAndTest