Skip to content

Commit

Permalink
added original cover tree insertion method for tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mikeizbicki committed Jan 7, 2015
1 parent 8b72600 commit 3df1af9
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 41 deletions.
2 changes: 1 addition & 1 deletion hlearn.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ executable hlearn-allknn
src/exec

ghc-options:
-- -prof
-prof
-- -Wall
-threaded
-rtsopts
Expand Down
7 changes: 4 additions & 3 deletions scripts/allknn-treetypes/runtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,18 @@ echo "--------------------------------------------------------------------------
hlearn_neighbors="./neighbors_hlearn.csv"
hlearn_distances="./distances_hlearn.csv"

#verbose="--verbose"
verbose="--verbose"
optimization="--varshift"
#method="--train-method=traininsert"

methodlist="trainmonoid traininsert_nosort traininsert_sort traininsert_parent traininsert_ancestor"
#methodlist="trainmonoid traininsert_nosort traininsert_sort traininsert_parent traininsert_ancestor traininsert_orig"
methodlist="traininsert_orig"

touch results

for method in $methodlist; do
echo -e "---\n$method\n---\n"
"$curdir/hlearn-allknn" --train-method="$method" -k $K -r "$curdir/$1" $2 $3 $4 $5 $optimization $verbose +RTS -K1000M -N2 -p -s 2>&1 | tee out.$method
"hlearn-allknn" --train-method="$method" -k $K -r "$curdir/$1" $2 $3 $4 $5 $optimization $verbose +RTS -K1000M -p -s 2>&1 | tee out.$method

mv ./hlearn-allknn.prof ./prof.$method

Expand Down
2 changes: 1 addition & 1 deletion src/HLearn/Data/Graph.hs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ instance KernelSpace v => KernelSpace (SelfKernel v) where
selfKernel (SelfKernel k _) = k

instance KernelSpace v => Normed (SelfKernel v) where
abs = kernelNorm
size = kernelNorm

instance KernelSpace v => MetricSpace (SelfKernel v) where
distance = kernelDistance
Expand Down
4 changes: 2 additions & 2 deletions src/HLearn/Data/LoadData.hs
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ loadCSV filepath = do
Left str -> error $ "failed to parse CSV file " ++ filepath ++ ": " ++ take 1000 str

putStrLn " dataset info:"
putStrLn $ " num dp: " ++ show (abs rs)
-- putStrLn $ " size dp: " ++ show (abs $ head rs)
putStrLn $ " num dp: " ++ show (size rs)
-- putStrLn $ " size dp: " ++ show (size $ head rs)
putStrLn ""

return rs
Expand Down
127 changes: 95 additions & 32 deletions src/HLearn/Data/SpaceTree/CoverTree.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import Control.Parallel.Strategies


{-# NOINLINE expratIORef #-}
expratIORef = unsafePerformIO $ newIORef (2::Rational)
expratIORef = unsafePerformIO $ newIORef (1.3::Rational)

setexpratIORef :: Rational -> P.IO ()
setexpratIORef r = writeIORef expratIORef r
Expand Down Expand Up @@ -585,26 +585,96 @@ instance
-------------------------------------------------------------------------------
-- construction

{-# INLINE trainInsertParent #-}
trainInsertParent ::
{-# INLINABLE singletonCT #-}
singletonCT :: ValidCT exprat childC leafC dp => dp -> CoverTree_ exprat childC leafC dp
singletonCT dp = Node
{ nodedp = dp
, nodeWeight = 1
, level = minBound
, numdp = 1
, children = empty
, leaves = empty
, maxDescendentDistance = 0
}

{-# INLINABLE trainMonoid #-}
trainMonoid ::
( ValidCT exprat childC leafC (Elem xs)
, Foldable xs
) => xs
-> Maybe' (CoverTree_ exprat childC leafC (Elem xs))
trainInsertParent xs = {-# SCC trainInsertParent #-} case unCons xs of
Nothing -> Nothing'
Just (dp,dps) -> Just' $ foldr' (insertCT addChild_parent) (singletonCT dp) $ toList dps
trainMonoid xs = {-# SCC trainMonoid #-} foldtree1 $ map (Just' . singletonCT) $ toList xs

{-# INLINABLE trainInsert #-}
trainInsert ::
----------------------------------------

{-# INLINABLE trainInsertOrig #-}
trainInsertOrig ::
( ValidCT exprat childC leafC (Elem xs)
, Foldable xs
) => AddChildMethod exprat childC leafC (Elem xs)
-> xs
) => xs
-> Maybe' (CoverTree_ exprat childC leafC (Elem xs))
trainInsert addChild xs = {-# SCC trainInsert #-} case unCons xs of
trainInsertOrig xs = {-# SCC trainInsertOrig #-}case unCons xs of
Nothing -> Nothing'
Just (dp,dps) -> Just' $ foldr' (insertCT addChild) (singletonCT dp) $ toList dps
Just (dp,dps) -> Just' $ foldr' insertCTOrig (singletonCT dp) dps

{-# INLINABLE insertCTOrig #-}
insertCTOrig ::
( ValidCT exprat childC leafC dp
) => dp
-> CoverTree_ exprat childC leafC dp
-> CoverTree_ exprat childC leafC dp
insertCTOrig dp ct = insertCTOrig_ dp ct (distance dp (nodedp ct))

{-# INLINABLE insertCTOrig_ #-}
insertCTOrig_ :: forall exprat childC leafC dp.
( ValidCT exprat childC leafC dp
) => dp
-> CoverTree_ exprat childC leafC dp
-> Scalar dp
-> CoverTree_ exprat childC leafC dp
insertCTOrig_ dp ct dist =
if dist > coverdist ct

-- | ct can't cover dp, so create a new node at dp that covers ct
then Node
{ nodedp = dp
, nodeWeight = 1
, level = dist2level_up (Proxy::Proxy exprat) dist
, numdp = numdp ct+1
, maxDescendentDistance = dist+maxDescendentDistance ct
, children = singleton
$ raiseRootLevel (dist2level_down (Proxy::Proxy exprat) dist)
$ ct
, leaves = empty
}

-- | insert dp underneath ct
else ct
{ numdp = numdp ct+1
, maxDescendentDistance = max dist (maxDescendentDistance ct)
, children = fromList $ go [] $ toList $ children ct
}

where
go !acc (x:xs) = if isFartherThan (nodedp x) dp (sepdist ct)
then go (x:acc) xs
else acc+((insertCTOrig dp x):xs)

go !acc [] = if dist > sepdist ct

-- far from root, so just insert the node
then ((singletonCT dp) { level = level ct-1 }):acc

-- close to root, so add new lower root level
else insertCTOrig_ dp ct' dist:acc
where
ct' = (singletonCT (nodedp ct))
{ level = level ct-1
, numdp = 0
, nodeWeight = 0
}

----------------------------------------

{-# INLINABLE trainInsertNoSort #-}
trainInsertNoSort ::
Expand All @@ -616,26 +686,6 @@ trainInsertNoSort xs = {-# SCC trainInsertNoSort #-}case unCons xs of
Nothing -> Nothing'
Just (dp,dps) -> Just' $ foldr' insertCTNoSort (singletonCT dp) dps

{-# INLINABLE trainMonoid #-}
trainMonoid ::
( ValidCT exprat childC leafC (Elem xs)
, Foldable xs
) => xs
-> Maybe' (CoverTree_ exprat childC leafC (Elem xs))
trainMonoid xs = {-# SCC trainMonoid #-} foldtree1 $ map (Just' . singletonCT) $ toList xs

{-# INLINABLE singletonCT #-}
singletonCT :: ValidCT exprat childC leafC dp => dp -> CoverTree_ exprat childC leafC dp
singletonCT dp = Node
{ nodedp = dp
, nodeWeight = 1
, level = minBound
, numdp = 1
, children = empty
, leaves = empty
, maxDescendentDistance = 0
}

{-# INLINABLE insertCTNoSort #-}
insertCTNoSort :: forall exprat childC leafC dp.
( ValidCT exprat childC leafC dp
Expand Down Expand Up @@ -673,6 +723,19 @@ insertCTNoSort dp ct = {-# SCC insertCTNoSort #-}
then go (x:acc) xs
else acc+((insertCTNoSort dp x):xs)

----------------------------------------

{-# INLINABLE trainInsert #-}
trainInsert ::
( ValidCT exprat childC leafC (Elem xs)
, Foldable xs
) => AddChildMethod exprat childC leafC (Elem xs)
-> xs
-> Maybe' (CoverTree_ exprat childC leafC (Elem xs))
trainInsert addChild xs = {-# SCC trainInsert #-} case unCons xs of
Nothing -> Nothing'
Just (dp,dps) -> Just' $ foldr' (insertCT addChild) (singletonCT dp) $ toList dps

-- | Insert a single data point into the cover tree.
{-# INLINABLE insertCT #-}
insertCT :: forall exprat childC leafC dp.
Expand Down
2 changes: 1 addition & 1 deletion src/HLearn/Models/Classifiers/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ instance
type instance Scalar (MaybeLabeled label attr) = Scalar attr

instance Normed attr => Normed (MaybeLabeled label attr) where
abs (MaybeLabeled _ a) = abs a
size (MaybeLabeled _ a) = size a

-- FIXME: add faster functions
instance MetricSpace attr => MetricSpace (MaybeLabeled label attr) where
Expand Down
4 changes: 3 additions & 1 deletion src/exec/hlearn-allknn.hs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ data Params = Params
deriving (Show, Data, Typeable)

data TrainMethod
= TrainInsert_NoSort
= TrainInsert_Orig
| TrainInsert_NoSort
| TrainInsert_Sort
| TrainInsert_Parent
| TrainInsert_Ancestor
Expand Down Expand Up @@ -418,6 +419,7 @@ buildTree params xs = do
setexpratIORef $ P.toRational $ expansionRatio params

let trainmethod = case train_method params of
TrainInsert_Orig -> trainInsertOrig
TrainInsert_NoSort -> trainInsertNoSort
TrainInsert_Sort -> trainInsert addChild_nothing
TrainInsert_Parent -> trainInsert addChild_parent
Expand Down

0 comments on commit 3df1af9

Please sign in to comment.