Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

196 lines (168 sloc) 6.787 kb
{-# LANGUAGE BangPatterns, CPP, DeriveFunctor #-}
-- | This module allows for streaming decoding of CSV data. This is
-- useful if you need to parse large amounts of input in constant
-- space. The API also allows you to ignore type conversion errors on
-- a per-record basis.
module Data.Csv.Streaming
(
-- * Usage example
-- $example
-- * Stream representation
-- $stream-representation
Records(..)
-- * Decoding records
-- $typeconversion
-- ** Index-based record conversion
-- $indexbased
, decode
, decodeWith
-- ** Name-based record conversion
-- $namebased
, decodeByName
, decodeByNameWith
) where
import Control.Applicative ((<$>), (<*>), pure)
import Control.DeepSeq (NFData(rnf))
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
import Data.Foldable (Foldable(..))
import Data.Traversable (Traversable(..))
import Prelude hiding (foldr)
import Data.Csv.Conversion
import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith,
decodeWith)
import qualified Data.Csv.Incremental as I
import Data.Csv.Parser
import Data.Csv.Types
#if !MIN_VERSION_bytestring(0,10,0)
import qualified Data.ByteString.Lazy.Internal as BL -- for constructors
#endif
-- $example
--
-- A short usage example:
--
-- > for_ (decode False "John,27\r\nJane,28\r\n") $ \ (name, age :: Int) ->
-- > putStrLn $ name ++ " is " ++ show age ++ " years old"
--
-- N.B. The 'Foldable' instance, which is used above, skips records
-- that failed to convert. If you don't want this behavior, work
-- directly with the 'Cons' and 'Nil' constructors.
-- $stream-representation
--
-- A stream of records is represented as a (lazy) list that may
-- contain errors.
-- $typeconversion
--
-- Just like in the case of non-streaming decoding, there are two ways
-- to convert CSV records to and from and user-defined data types:
-- index-based conversion and name-based conversion.
-- $indexbased
--
-- See documentation on index-based conversion in "Data.Csv" for more
-- information.
-- $namebased
--
-- See documentation on name-based conversion in "Data.Csv" for more
-- information.
-- | A stream of parsed records. If type conversion failed for the
-- record, the error is returned as @'Left' errMsg@.
data Records a
= -- | A record or an error message, followed by more records.
Cons (Either String a) (Records a)
-- | End of stream, potentially due to a parse error. If a parse
-- error occured, the first field contains the error message.
-- The second field contains any unconsumed input.
| Nil (Maybe String) BL.ByteString
deriving (Eq, Functor, Show)
-- | Skips records that failed to convert.
instance Foldable Records where
foldr = foldrRecords
#if MIN_VERSION_base(4,6,0)
foldl' = foldlRecords'
#endif
foldrRecords :: (a -> b -> b) -> b -> Records a -> b
foldrRecords f = go
where
go z (Cons (Right x) rs) = f x (go z rs)
go z _ = z
{-# INLINE foldrRecords #-}
#if MIN_VERSION_base(4,6,0)
foldlRecords' :: (a -> b -> a) -> a -> Records b -> a
foldlRecords' f = go
where
go z (Cons (Right x) rs) = let z' = f z x in z' `seq` go z' rs
go z _ = z
{-# INLINE foldlRecords' #-}
#endif
instance Traversable Records where
traverse _ (Nil merr rest) = pure $ Nil merr rest
traverse f (Cons x xs) = Cons <$> traverseElem x <*> traverse f xs
where
traverseElem (Left err) = pure $ Left err
traverseElem (Right y) = Right <$> f y
instance NFData a => NFData (Records a) where
rnf (Cons r rs) = rnf r `seq` rnf rs
#if MIN_VERSION_bytestring(0,10,0)
rnf (Nil errMsg rest) = rnf errMsg `seq` rnf rest
#else
rnf (Nil errMsg rest) = rnf errMsg `seq` rnfLazyByteString rest
rnfLazyByteString :: BL.ByteString -> ()
rnfLazyByteString BL.Empty = ()
rnfLazyByteString (BL.Chunk _ b) = rnfLazyByteString b
#endif
-- | Efficiently deserialize CSV records in a streaming fashion.
-- Equivalent to @'decodeWith' 'defaultDecodeOptions'@.
decode :: FromRecord a
=> Bool -- ^ Data contains header that should be
-- skipped
-> BL.ByteString -- ^ CSV data
-> Records a
decode = decodeWith defaultDecodeOptions
-- | Like 'decode', but lets you customize how the CSV data is parsed.
decodeWith :: FromRecord a
=> DecodeOptions -- ^ Decoding options
-> Bool -- ^ Data contains header that should be
-- skipped
-> BL.ByteString -- ^ CSV data
-> Records a
decodeWith !opts skipHeader s0 = case BL.toChunks s0 of
[] -> go [] (feedEndOfInput $ I.decodeWith opts skipHeader)
(s:ss) -> go ss (I.decodeWith opts skipHeader `feedChunk` s)
where
go ss (Done xs) = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs
go ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss))
go [] (Partial k) = go [] (k B.empty)
go (s:ss) (Partial k) = go ss (k s)
go [] (Some xs k) = foldr Cons (go [] (k B.empty)) xs
go (s:ss) (Some xs k) = foldr Cons (go ss (k s)) xs
-- | Efficiently deserialize CSV in a streaming fashion. The data is
-- assumed to be preceeded by a header. Returns @'Left' errMsg@ if
-- parsing the header fails. Equivalent to @'decodeByNameWith'
-- 'defaultDecodeOptions'@.
decodeByName :: FromNamedRecord a
=> BL.ByteString -- ^ CSV data
-> Either String (Header, Records a)
decodeByName = decodeByNameWith defaultDecodeOptions
-- TODO: Include something more in error messages?
-- | Like 'decodeByName', but lets you customize how the CSV data is
-- parsed.
decodeByNameWith :: FromNamedRecord a
=> DecodeOptions -- ^ Decoding options
-> BL.ByteString -- ^ CSV data
-> Either String (Header, Records a)
decodeByNameWith !opts s0 = case BL.toChunks s0 of
[] -> go [] (feedEndOfInputH $ I.decodeByNameWith opts)
(s:ss) -> go ss (I.decodeByNameWith opts `feedChunkH` s)
where
go ss (DoneH hdr p) = Right (hdr, go2 ss p)
go ss (FailH rest err) = Left $ err ++ " at " ++
show (BL8.unpack . BL.fromChunks $ rest : ss)
go [] (PartialH k) = go [] (k B.empty)
go (s:ss) (PartialH k) = go ss (k s)
go2 ss (Done xs) = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs
go2 ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss))
go2 [] (Partial k) = go2 [] (k B.empty)
go2 (s:ss) (Partial k) = go2 ss (k s)
go2 [] (Some xs k) = foldr Cons (go2 [] (k B.empty)) xs
go2 (s:ss) (Some xs k) = foldr Cons (go2 ss (k s)) xs
Jump to Line
Something went wrong with that request. Please try again.