Skip to content
Browse files

CSV ops based on Text!

  • Loading branch information...
1 parent b6b83c2 commit 90bd0af397a3f1ebed65e757e8f856fbf660a338 @ozataman committed Apr 7, 2012
Showing with 94 additions and 52 deletions.
  1. +1 −0 csv-conduit.cabal
  2. +93 −52 src/Data/CSV/Conduit.hs
View
1 csv-conduit.cabal
@@ -69,6 +69,7 @@ Library
, containers >= 0.3
, directory
, bytestring
+ , text
, transformers >= 0.2
, safe
, unix-compat >= 0.2.1.1
View
145 src/Data/CSV/Conduit.hs
@@ -1,6 +1,8 @@
{-# LANGUAGE OverloadedStrings, BangPatterns #-}
{-# LANGUAGE PackageImports #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
module Data.CSV.Conduit
@@ -24,38 +26,47 @@ import Data.Conduit as C
import Data.Conduit.Attoparsec
import Data.Conduit.Binary
import qualified Data.Map as M
+import Data.String
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.Word (Word8)
import Safe (headMay)
import System.Directory
import System.IO
import System.PosixCompat.Files (getFileStatus, fileSize)
-------------------------------------------------------------------------------
import qualified Data.CSV.Conduit.Parser.ByteString as BSP
+import qualified Data.CSV.Conduit.Parser.Text as TP
import Data.CSV.Conduit.Types
-------------------------------------------------------------------------------
-class CSVeable r where
+-------------------------------------------------------------------------------
+-- | Represents types 'r' that can be converted from an underlying
+-- stream of type 's'.
+class CSVeable s r where
-----------------------------------------------------------------------------
-- | Convert a CSV row into strict ByteString equivalent.
- rowToStr :: CSVSettings -> r -> B.ByteString
+ rowToStr :: CSVSettings -> r -> s
-----------------------------------------------------------------------------
-- | Possibly return headers.
- fileHeaders :: r -> Maybe (Row ByteString)
+ fileHeaders :: r -> Maybe (Row s)
fileHeaders = const Nothing
-----------------------------------------------------------------------------
- intoCSV :: MonadResource m => CSVSettings -> Conduit B.ByteString m r
+ -- | Turn a stream of 's' into a stream of CSV row type
+ intoCSV :: MonadResource m => CSVSettings -> Conduit s m r
-----------------------------------------------------------------------------
- fromCSV :: MonadResource m => CSVSettings -> Conduit r m B.ByteString
+ -- | Turn a stream of CSV row type back into a stream of 's'
+ fromCSV :: MonadResource m => CSVSettings -> Conduit r m s
------------------------------------------------------------------------------
--- | 'Row' instance
-instance CSVeable (Row ByteString) where
+-- | 'Row' instance using 'ByteString'
+instance CSVeable ByteString (Row ByteString) where
rowToStr s !r =
let
sep = B.pack [c2w (csvOutputColSep s)]
@@ -65,55 +76,85 @@ instance CSVeable (Row ByteString) where
escape c str = B8.intercalate (B8.pack [c,c]) $ B8.split c str
in B.intercalate sep . map wrapField $ r
- intoCSV set = parser =$= puller
- where
- parser = sequenceSink () seqSink
- seqSink _ = do
- p <- sinkParser (BSP.row set)
- return $ Emit () [p]
- puller = do
- inc <- await
- case inc of
- Nothing -> return ()
- Just i ->
- case i of
- Just i' -> yield i' >> puller
- Nothing -> puller
-
- fromCSV set = conduitState init push close
- where
- init = ()
- push st r = return $ StateProducing st [B.concat [rowToStr set r, "\n"]]
- close _ = return []
-
-
+ intoCSV set = intoCSVRow (BSP.row set)
+ fromCSV set = fromCSVRow set
+
+
+------------------------------------------------------------------------------
+-- | 'Row' instance using 'Text'
+instance CSVeable Text (Row Text) where
+ rowToStr s !r =
+ let
+ sep = T.pack [(csvOutputColSep s)]
+ wrapField !f = case (csvOutputQuoteChar s) of
+ Just !x -> x `T.cons` escape x f `T.snoc` x
+ otherwise -> f
+ escape c str = T.intercalate (T.pack [c,c]) $ T.split (== c) str
+ in T.intercalate sep . map wrapField $ r
+
+ intoCSV set = intoCSVRow (TP.row set)
+ fromCSV set = fromCSVRow set
+
-------------------------------------------------------------------------------
--- | 'MapRow' Instance
-instance CSVeable (MapRow ByteString) where
- rowToStr s r = rowToStr s . M.elems $ r
+fromCSVRow set = conduitState init push close
+ where
+ init = ()
+ push st r = return $ StateProducing st [rowToStr set r, "\n"]
+ close _ = return []
+
+
+-------------------------------------------------------------------------------
+intoCSVRow p = parser =$= puller
+ where
+ parser = sequenceSink () seqSink
+ seqSink _ = do
+ p <- sinkParser p
+ return $ Emit () [p]
+ puller = do
+ inc <- await
+ case inc of
+ Nothing -> return ()
+ Just i ->
+ case i of
+ Just i' -> yield i' >> puller
+ Nothing -> puller
+
+
+-------------------------------------------------------------------------------
+-- | Generic 'MapRow' instance; any stream type with a 'Row' instance
+-- automatically gets a 'MapRow' instance.
+instance (CSVeable s (Row s), Ord s, IsString s) => CSVeable s (MapRow s) where
+ rowToStr s r = rowToStr s . M.elems $ r
fileHeaders r = Just $ M.keys r
-
- intoCSV set = intoCSV set =$= converter
- where
- converter = conduitState Nothing push close
- where
- push Nothing row =
- case row of
- [] -> return $ StateProducing Nothing []
- xs -> return $ StateProducing (Just xs) []
- push st@(Just hs) row = return $ StateProducing st [toMapCSV hs row]
- toMapCSV !headers !fs = M.fromList $ zip headers fs
- close _ = return []
- fromCSV set = conduitState False push close
- where
- push False r = return $ StateProducing True
- [B.concat [rowToStr set (M.keys r), "\n", rowToStr set (M.elems r), "\n"]]
- push True r = return $ StateProducing True
- [B.concat [rowToStr set (M.elems r), "\n"]]
- close _ = return []
-
+ intoCSV set = intoCSVMap set
+ fromCSV set = fromCSVMap set
+
+
+-------------------------------------------------------------------------------
+intoCSVMap set = intoCSV set =$= converter
+ where
+ converter = conduitState Nothing push close
+ where
+ push Nothing row =
+ case row of
+ [] -> return $ StateProducing Nothing []
+ xs -> return $ StateProducing (Just xs) []
+ push st@(Just hs) row = return $ StateProducing st [toMapCSV hs row]
+ toMapCSV !headers !fs = M.fromList $ zip headers fs
+ close _ = return []
+
+
+-------------------------------------------------------------------------------
+fromCSVMap set = conduitState False push close
+ where
+ push False r = return $ StateProducing True
+ [rowToStr set (M.keys r), "\n", rowToStr set (M.elems r), "\n"]
+ push True r = return $ StateProducing True
+ [rowToStr set (M.elems r), "\n"]
+ close _ = return []
+

0 comments on commit 90bd0af

Please sign in to comment.
Something went wrong with that request. Please try again.