/
Attoparsec.hs
148 lines (129 loc) · 5.61 KB
/
Attoparsec.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-|
Module : network-attoparsec
Description : Utility functions for running a parser against a socket
Copyright : (c) Leon Mergen, 2015
License : MIT
Maintainer : leon@solatis.com
Stability : experimental
Utility functions for running a parser against a socket, without the need of a
bigger framework such as Pipes or Conduit.
-}
module Network.Attoparsec (ParseC, parseMany, parseOne) where
import Control.Monad.Catch
import Control.Monad.IO.Class
import Control.Exception.Enclosed (tryAny)
import qualified Data.Attoparsec.ByteString as Atto
import qualified Data.ByteString as BS
import qualified Network.Socket as NS
import qualified Network.Socket.ByteString as NSB
-- | The parsing continuation form of a "Data.Attoparsec" parser. This is
-- typically created by running the attoparsec "parse" function:
--
-- > createParser = AttoParsec.parse myParser
type ParseC a = BS.ByteString -> Atto.Result a
-- | The type of parsing to perform, greedy or non-greedy
data ParseMode = Single | Many
-- | Consumes input from socket and attempts to parse as many objects from the
-- socket as possible. Use this function only when you expect more than one
-- parse operation to succeed.
--
-- The function is continuation based, so you must provide the next parser
-- state in successing calls as follows:
--
-- > doParse sock = do
-- > (p1, xs1) <- parseMany sock (AttoParsec.parse myParser) (AttoParsec.parse myParser)
-- > (_, xs2) <- parseMany sock (AttoParsec.parse myParser) p1
-- > return (xs1 ++ xs2)
--
-- For more usage examples, see the test directory.
parseMany :: ( MonadIO m
, MonadMask m
, Show a)
=> NS.Socket -- ^ Socket to read data from
-> ParseC a -- ^ Initial parser state
-> ParseC a -- ^ Continuation parser state
-> m (ParseC a, [a]) -- ^ Next parser state with parsed values
parseMany s p0 pCur = do
buf <- readAvailable s Nothing
(p1, xs) <- parseBuffer p0 Many buf pCur
return (p1, xs)
-- | Similar to parseMany, but assumes that there will only be enough data for a
-- single succesful parse on the socket, and guarantees that exactly one item
-- will be parsed.
--
-- __Warning:__ In order to make this function work stable with pipelined data,
-- we read in data one byte at a time, which causes many context
-- switches and kernel syscalls, and furthermore causes a lot of
-- separate calls to attoparsec. So only use if performance is not
-- a consideration.
--
-- The is typically used as follows:
--
-- > doParse sock = parseOne sock (AttoParsec.parse myParser)
parseOne :: ( MonadIO m
, MonadMask m
, Show a)
=> NS.Socket -- ^ Socket to read data from
-> ParseC a -- ^ Initial parser state
-> m a -- ^ Parsed value
parseOne s p0 = do
buf <- readAvailable s (Just 1)
(p1, value) <- parseBuffer p0 Single buf p0
case value of
-- We do not yet have enough data for a single item, let's request more
[] -> parseOne s p1
[x] -> return x
-- This is an internal error, since it means our single-object parser
-- returned multiple objects.
_ -> error "More than one element parsed"
parseBuffer :: ( MonadIO m
, MonadMask m
, Show a)
=> ParseC a -- ^ Initial parser state
-> ParseMode -- ^ Whether to perform greedy or non-greedy parsing
-> BS.ByteString -- ^ Unconsumed buffer from previous run
-> ParseC a -- ^ Current parser state
-> m (ParseC a, [a]) -- ^ Next parser state with parsed values
parseBuffer p0 mode =
let next bCur pCur =
case pCur bCur of
-- On error, throw error through MonadError
Atto.Fail err _ _ -> fail ("An error occurred while parsing: " ++ show err)
Atto.Partial p1 -> return (p1, [])
Atto.Done b1 v ->
if BS.null b1
-- This means a "perfect parse" occured: exactly enough data was on
-- the socket to complete one parse round.
then return (p0, [v])
else case mode of
-- We are in single-object parsing mode, have parsed one object,
-- but still have data left on the buffer: at this point, we can
-- either discard the data on the buffer, or throw an error.
--
-- We throw an error, since within "single-object parsing mode"
-- we assume only perfect parses happen.
Single -> fail ("Unconsumed data left on socket: " ++ show b1)
-- Multi-object parsing mode, in which case we will enter
-- recursion.
Many -> do
(p1, xs) <- next b1 p0
return (p1, v : xs)
in next
readAvailable :: ( MonadIO m
, MonadMask m)
=> NS.Socket
-> Maybe Int
-> m BS.ByteString
readAvailable s Nothing = readAvailable s (Just 2048)
readAvailable s (Just bytes) =
let buf :: IO (Maybe BS.ByteString)
buf = do
-- For some reason, Windows seems to be generating an exception sometimes
-- when the remote has closed the connection
result <- tryAny $ NSB.recv s bytes
case result of
Left _ -> return Nothing
Right v -> return (Just v)
in maybe (return BS.empty) return =<< liftIO buf