forked from elginer/shpider
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Links.hs
63 lines (56 loc) · 1.98 KB
/
Links.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
{-
-
- Copyright (c) 2009-2010 Johnny Morrice
-
- Permission is hereby granted, free of charge, to any person
- obtaining a copy of this software and associated documentation
- files (the "Software"), to deal in the Software without
- restriction, including without limitation the rights to use, copy,
- modify, merge, publish, distribute, sublicense, and/or sell copies
- of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-
-}
module Network.Shpider.Links
( Link (..)
, gatherLinks
, allLinks
)
where
import Data.Maybe
import Text.HTML.TagSoup.Parsec
import Network.Shpider.TextUtils
-- | Parse all links from a list of tags.
gatherLinks :: [ Tag String ] -> [ Link ]
gatherLinks =
tParse allLinks
-- | The parser responsible for getting all the links.
allLinks :: TagParser String [ Link ]
allLinks = do
ls <- allWholeTags "a"
return $ toLinks ls
toLinks tags =
catMaybes $ map toLink tags
toLink ( TagOpen _ attrs , innerTags , _ ) = do
address <- attrLookup "href" attrs
return $ Link { linkAddress = address
, linkText = innerText innerTags
}
-- | Links have an address, corresponding to the href attribute, and some inner tex.
data Link =
Link { linkAddress :: String
, linkText :: String
}
deriving ( Show , Eq )