Test.hs

{- |
Module      :  Test.hs
Description :  Testing the Data.IntDisjointSet module

Copyright   :  (c) Myles C. Maxfield
License     :  BSD3

Maintainer  :  litherum@gmail.com
Stability   :  provisional
Portability :  portable

This file has a main function, and is meant to be run from the command-line.
The return code of the program determines the success of the tests.
-}

import Control.Category
import Control.Arrow hiding (left, right)
import Control.Monad (guard)
import Data.IntDisjointSet
import qualified Data.List as L
import Data.Maybe
import qualified Data.Set as S
import Prelude hiding ((.), id, lookup, elem)
import Test.HUnit
import Test.QuickCheck

{-|
This is a reference implementation of the IntDisjointSet, implemented as a set of sets.
The 'disjoint' property is enforced by the insert function.
-}
type SlowIntDisjointSet = S.Set (S.Set Int)

empty' :: SlowIntDisjointSet
empty' = S.empty

{-|
This function is never needed, so to keep -Wall happy, it's commented out.
-}
{-
singleton' :: Int -> SlowIntDisjointSet
singleton' x = insert' x $ empty'
-}

{-|
Try to find the set. If the set is empty, that means that the set doesn't exist, in
which case, we're all set to insert it. Inserting it just creates a set with 1 element
and adds it to the SlowIntDisjointSet. Otherwise, it does nothing.
-}
insert' :: Int -> SlowIntDisjointSet -> SlowIntDisjointSet
insert' x sids
  | S.null $ findSet x sids = S.insert (S.singleton x) sids
  | otherwise = sids

{-|
Folding insert' over a list
-}
insertList' :: [Int] -> SlowIntDisjointSet -> SlowIntDisjointSet
insertList' l sids = foldl (flip insert') sids l

{-|
Find the set that x is in, and the set that y is in. Delete those two sets from the
SlowIntDisjointSet. If x or y isn't found, its containing set will be empty, and delete
does nothing if the given element isn't in the set. Once you've deleted the two sets
from the SlowIntDisjointSet, insert the union of the two back in.
-}
union' :: Int -> Int -> SlowIntDisjointSet -> SlowIntDisjointSet
union' x y sids = S.insert (S.union xset yset) $ S.delete xset $ S.delete yset sids
  where xset = findSet x sids
        yset = findSet y sids

{-|
Iterate through all the constituent sets. If the given element is a member of the
curent set, set the accumulator equal to the current set. Otherwise, keep the
accumulator empty.
-}
findSet :: Int -> SlowIntDisjointSet -> S.Set Int
findSet a sids = S.fold f S.empty sids
  where f e old
          | S.member a e = e
          | otherwise = old

{-|
The union of all the constituent sets
-}
getElements :: SlowIntDisjointSet -> S.Set Int
getElements sids = S.fold S.union S.empty sids

{-|
This is the same as insertList', but operates on the real IntDisjointSet. It's just a
convenience function.
-}
insertList :: [Int] -> IntDisjointSet -> IntDisjointSet
insertList xs set = foldr insert set xs

---------------------------------------------------------

{-|
This type is used to join an instance of the real IntDisjointSet with an instance of the
reference implementation, so we can compare the instances.
-}
newtype IntDisjointSets = IntDisjointSets (IntDisjointSet, SlowIntDisjointSet)
  deriving (Show)

{-|
Convenience function.
applyNTimes 4 f = f . f . f . f
applyNTimes 5 f = f . f . f . f . f
-}
applyNTimes :: Category cat => Int -> cat b b -> cat b b
applyNTimes n f = (foldr (.) id (replicate n f))

{-|
Create random IntDisjointSets by following a procedure of random inserts and random
unions. Each of these operations is performed on both the IntDisjointSet and the
reference implementation. This way, we get mirror images of each other, so our tests
can compare properties of the two.
-}
instance Arbitrary IntDisjointSets where
  arbitrary = sized f
    where f items = runKleisli (applyNTimes n $ Kleisli oneunion) starter
                    -- First, insert 'items' elements into both *IntDisjointSets
              where starter = IntDisjointSets (insertList [1..items] empty,
                                               insertList' [1..items] empty')
                    -- This is the number of random unions to perform
                    n = items - (floor $ sqrt (fromIntegral items :: Double))
                    -- Perform a single random union. We can use the 'Gen' monad to get
                    -- randomness. Because the applyNTimes function works on any
                    -- category, we can use it on this with the Kleisli operator.
                    oneunion :: IntDisjointSets -> Gen IntDisjointSets
                    oneunion (IntDisjointSets (ids, sids)) = do
                      x <- choose (1, items)
                      y <- choose (1, items)
                      return $ IntDisjointSets (union x y ids, union' x y sids)

---------------------------------------------------------

testRedundantInsert :: Test
testRedundantInsert = TestCase $ assertEqual "Inserting an element that's already inserted doesn't cause the number of elements to grow" 1 $
  disjointSetSize $ insert elem $ singleton elem
  where elem = 0

testRedundantInsertSize :: Test
testRedundantInsertSize = TestCase $ assertEqual "Inserting an element that's already inserted doesn't cause the number of sets to grow" 1 $
  size $ insert elem $ singleton elem
  where elem = 0

testInsertGrowsNumberOfSets :: Test
testInsertGrowsNumberOfSets = TestCase $ assertEqual "Inserting an element that hasn't already been inserted grows the number of sets by one" 2 $
  disjointSetSize $ insert 1 $ singleton 2

testInsertGrowsTotalSize :: Test
testInsertGrowsTotalSize = TestCase $ assertEqual "Inserting an element that hasn't already been inserted grows the number of elements by one" 2 $
  size $ insert 1 $ singleton 2

{-|
Inserting n elements makes the IntDisjointSet have n constituent elements
-}
testInsertListSize :: [Int] -> Bool
testInsertListSize l = size (insertList l empty) == length (L.nub l)

{-|
Inserting n elements makes the IntDisjointSet have n constituent sets
-}
testInsertListDisjointSetSize :: [Int] -> Bool
testInsertListDisjointSetSize l = disjointSetSize (insertList l empty) ==
  length (L.nub l)

----------------------------------------------------------

{-|
For every item in the reference implementation, ask the IntDisjointSet for its
representative. Make sure that the item and the representative are in the same set in
the reference implementation
-}
testInsertConsistent :: IntDisjointSets -> Bool
testInsertConsistent (IntDisjointSets (ids, sids)) = all check $ S.toList $
  getElements sids
  where check e = findSet e sids == findSet rep sids
          where rep = fromJust $ fst $ lookup e ids

{-|
Do the following for each set in the reference implementation:
For each item in the set, ask the IntDisjointSet for its representative. Make sure all
the representatives in this set are the same
-}
testMembersGoToSameItem :: IntDisjointSets -> Bool
testMembersGoToSameItem (IntDisjointSets (ids, sids)) = all f $ S.toList sids
  where allTheSame [] = True
        allTheSame l = all (== head l) $ tail l
        f :: S.Set Int -> Bool
        f s = allTheSame $ L.map (fst . ((flip lookup) ids)) $ S.toList s

{-|
The number of elements should be the same in the two implementations
-}
testSizesAreSame :: IntDisjointSets -> Bool
testSizesAreSame (IntDisjointSets (ids, sids)) = size ids == S.size (getElements sids)

{-|
The number of sets should be the same in the two implementations
-}
testNumberOfSetsAreSame :: IntDisjointSets -> Bool
testNumberOfSetsAreSame (IntDisjointSets (ids, sids)) = disjointSetSize ids ==
  S.size sids

insertAndTest :: Int -> (Bool, IntDisjointSet) -> (Bool, IntDisjointSet)
insertAndTest x (_, set) =
  let set' = insert x set
  in  case lookup x set of
        -- x was already in set. The two sets should be unchanged.
        (Just _, _) -> (L.sort (fst (toList set)) == L.sort (fst (toList set')), set')
        -- x is new to the set. The sets should be unchanged except for entry x.
        (Nothing, _) -> let xs = fst $ toList set'
                   in  ((x,x) `L.elem` xs &&
                        L.sort (L.delete (x,x) xs) == L.sort (fst (toList set)), set')

testInsertAndTest :: [Int] -> Bool
testInsertAndTest l = all fst $ scanl (flip insertAndTest) (True, empty) l

unionAndTest :: (Int, Int) -> (Bool, IntDisjointSet) -> (Bool, IntDisjointSet)
unionAndTest (x', y') (_, set) = case (lookup x' set, lookup y' set) of
  -- Both elements are present, but they may be in the same set
  ((Just x, _), (Just y, _)) -> (if x == y then L.sort (fst (toList set)) ==
                                                L.sort (fst (toList unioned))
                                   else size unioned == size set &&
                                     disjointSetSize unioned == disjointSetSize set - 1,
                                 unioned)
  _ -> (True, set)
  where unioned = union x' y' set
  
testUnionAndTest :: (IntDisjointSets, [(Int, Int)]) -> Bool
testUnionAndTest (IntDisjointSets (ids, _), l) = all fst $ scanl (flip unionAndTest) (True, ids) l

------------------------------------------------------------

{-|
These functions shouldn't change the IntDisjointSet in a user-visible way. The idea is
to run the previous test suite on the output of these functions, and they should still
all pass. The idea is to show that these operations are transparent.
-}

{-|
Call lookup on an element with the IntDisjointSet. Disregard the element's
representative, but keep track of the partially-optimized IntDisjointSet. Fold this
across all the elements in the reference implementation.
-}
optimizeByLookup :: IntDisjointSets -> IntDisjointSets
optimizeByLookup (IntDisjointSets (ids, sids)) = IntDisjointSets (optimized, sids)
  where optimized = foldl (\ ids' e -> snd $ lookup e ids') ids $
          S.toList $ getElements sids

{-|
Map (+1) across the reference implementation and the IntDisjointSet.
-}
optimizeByMap :: IntDisjointSets -> IntDisjointSets
optimizeByMap (IntDisjointSets (ids, sids)) = IntDisjointSets
  (Data.IntDisjointSet.map (+ 1) ids, S.map (S.map (+ 1)) sids)

{-|
Disregard the input IntDisjointSet. Take the reference implementation and split it down
the middle into two groups of sets. Recreate these two groups of sets into two new
IntDisjointSets by inserting each element and then unioning the relevant sets together.
Then, merge the two IntDisjointSets together.
-}
optimizeByMerge :: IntDisjointSets -> IntDisjointSets
optimizeByMerge untouched@(IntDisjointSets (_, sids))
  | S.size sids == 1 = untouched
  | otherwise = IntDisjointSets (unsafeMerge left right, sids)
  where sets = S.toList $ sids
        (mergel, merger) = splitAt ((L.length sets) `div` 2) sets
        (left, right) = (f mergel, f merger)
          where f l = foldl g starter l
                  where starter = insertList (S.toList $ getElements $ S.fromList l)
                          empty
                        g shallow s = foldl (\ accum e -> union (head l') e accum)
                          shallow l'
                          where l' = S.toList s

--------------------------------------------------------------

{-|
All the HUnit-style functions that relate to the insert function
-}
testInsert :: Test
testInsert = TestList [ testRedundantInsert
                      , testRedundantInsertSize
                      , testInsertGrowsNumberOfSets
                      , testInsertGrowsTotalSize
                      ]

{-|
Run quickCheck, and make sure the result is acceptable. mzero for IO is to throw an
exception, which is unchecked and will cause the program to terminate with an error
code, which is what we want. There's probably a better way to do this, but I can't find
it.
-}
runQuickCheckAndGuard :: Test.QuickCheck.Testable prop => prop -> IO ()
runQuickCheckAndGuard f = do
  results <- quickCheckResult f
  guard $ check results
  where check (Success _ _ _) = True
        check (GaveUp _ _ _) = False
        check (Failure _ _ _ _ _ _ _) = False
        check (NoExpectedFailure _ _ _) = False

{-|
Run all the tests
-}
main :: IO ()
main = do
  results <- runTestTT $ TestList [testInsert]
  guard (errors results == 0 && failures results == 0)
  runQuickCheckAndGuard testInsertListSize
  runQuickCheckAndGuard testInsertListDisjointSetSize

  sequence_ [runQuickCheckAndGuard (x . y) | x <- [ testInsertConsistent
                                                  , testMembersGoToSameItem
                                                  , testSizesAreSame
                                                  , testNumberOfSetsAreSame
                                                  ],
                                             y <- [ id
                                                  , optimizeByLookup
                                                  , optimizeByMap
                                                  , optimizeByMerge
                                                  ]]

  runQuickCheckAndGuard testInsertAndTest
  runQuickCheckAndGuard testUnionAndTest