Permalink
Browse files

Add support for type conversion based on named records

  • Loading branch information...
tibbe committed Apr 23, 2012
1 parent 1240f12 commit ca7760576c434eafbd0250ac62b6cecc249edc7e
Showing with 193 additions and 8 deletions.
  1. +41 −2 Data/Ceason.hs
  2. +2 −2 Data/Ceason/Parser/Internal.hs
  3. +9 −0 Data/Ceason/Types.hs
  4. +133 −4 Data/Ceason/Types/Class.hs
  5. +6 −0 Data/Ceason/Types/Internal.hs
  6. +2 −0 ceason.cabal
View
@@ -10,27 +10,66 @@ module Data.Ceason
-- * Core CSV types
, Csv
, Record
+ , NamedRecord
, Field
-- * Type conversion
+ -- $typeconversion
+
+ -- ** Index-based conversion
+ -- $indexbased
, Only(..)
, FromRecord(..)
- , FromField(..)
, ToRecord(..)
+
+ -- ** Name-based conversion
+ -- $namebased
+ , BSMap(..)
+ , BSHashMap(..)
+ , FromNamedRecord(..)
+ , ToNamedRecord(..)
+
+ -- ** Individual field conversion
+ , FromField(..)
, ToField(..)
-- * Accessors
, (.!)
+ , (.:)
+ , (.=)
+ , record
+ , namedRecord
) where
import qualified Data.ByteString.Lazy as L
import Data.Traversable
import Data.Vector (Vector)
import Data.Ceason.Encode
-import Data.Ceason.Parser.Internal
+import Data.Ceason.Parser.Internal hiding (record)
import Data.Ceason.Types
+-- $typeconversion
+--
+-- There are two ways to convert between CSV records and data types:
+-- index based and name based.
+
+-- $indexbased
+--
+-- Index-based conversion lets you convert CSV records to data types
+-- by referring to a field's position (its index) in the file. The
+-- first column in a CSV file is given index 0, the second index 1,
+-- and so on.
+
+-- $namebased
+--
+-- Name-based conversion lets you convert CSV records to data types by
+-- referring to a field's name. The names of the fields are defined
+-- by the first line in the file, also known as the header.
+-- Name-based conversion is more robust to changes in the file
+-- structure e.g. to reording or addition of columns, but can be a bit
+-- slower.
+
-- | Efficiently deserialize CSV records from a lazy
-- 'L.ByteString'. If this fails due to incomplete or invalid input,
-- 'Nothing' is returned.
@@ -19,7 +19,7 @@ import Data.Monoid
import qualified Data.Vector as V
import Data.Word
-import Data.Ceason.Types
+import Data.Ceason.Types hiding (record)
csv :: AL.Parser Csv
csv = do
@@ -34,7 +34,7 @@ record = V.fromList <$> field `sepBy1` comma
field :: AL.Parser Field
field = do
- mb <- A.peek
+ mb <- A.peekWord8
-- We purposely don't use <|> as we want to commit to the first
-- choice if we see a double quote.
case mb of
View
@@ -3,6 +3,7 @@ module Data.Ceason.Types
-- * Core CSV types
Csv
, Record
+ , NamedRecord
, Field
, Parser
, Result(..)
@@ -13,12 +14,20 @@ module Data.Ceason.Types
-- * Type conversion
, Only(..)
, FromRecord(..)
+ , BSMap(..)
+ , BSHashMap(..)
+ , FromNamedRecord(..)
+ , ToNamedRecord(..)
, FromField(..)
, ToRecord(..)
, ToField(..)
-- * Accessors
, (.!)
+ , (.:)
+ , (.=)
+ , record
+ , namedRecord
) where
import Data.Ceason.Types.Class
View
@@ -1,22 +1,33 @@
+{-# LANGUAGE OverloadedStrings #-}
module Data.Ceason.Types.Class
- (
+ (
-- * Type conversion
Only(..)
, FromRecord(..)
+ , BSMap(..)
+ , BSHashMap(..)
+ , FromNamedRecord(..)
+ , ToNamedRecord(..)
, FromField(..)
, ToRecord(..)
, ToField(..)
-- * Accessors
, (.!)
+ , (.:)
+ , (.=)
+ , record
+ , namedRecord
) where
import Control.Applicative
import Data.Attoparsec.Char8 (double, number, parseOnly)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
+import qualified Data.HashMap.Lazy as HM
import Data.Int
+import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as LT
@@ -34,6 +45,9 @@ import Data.Ceason.Types.Internal
------------------------------------------------------------------------
-- Type conversion
+------------------------------------------------------------------------
+-- Index-based conversion
+
-- | A type that can be converted from a single CSV record, with the
-- possibility of failure.
--
@@ -67,12 +81,24 @@ newtype Only a = Only {
fromOnly :: a
} deriving (Eq, Ord, Read, Show)
+-- | A type that can be converted to a single CSV record.
+--
+-- An example type and instance:
+--
+-- @data Person = Person { name :: Text, age :: Int }
+--
+-- instance ToRecord Person where
+-- toRecord (Person name age) = record [
+-- toField name, toField age]
+-- @
+--
+-- Outputs data on this form:
+--
+-- > John,56
+-- > Jane,55
class ToRecord a where
toRecord :: a -> Record
-class ToField a where
- toField :: a -> Field
-
instance FromField a => FromRecord (Only a) where
parseRecord v
| n == 1 = Only <$> parseField (V.unsafeIndex v 0)
@@ -199,9 +225,85 @@ instance ToField a => ToRecord [a] where
instance FromField a => FromRecord (V.Vector a) where
parseRecord = traverse parseField
+-- TODO: Check if this can give rise to overlapping instances.
instance ToField a => ToRecord (Vector a) where
toRecord = V.map toField
+------------------------------------------------------------------------
+-- Name-based conversion
+
+-- | A 'M.Map' keyed by 'B.ByteString' keys.
+--
+-- The primary use case of 'BSMap' is to decode a CSV file into a
+-- @'BSMap' 'B.ByteString' 'B.ByteString'@, which lets you process the
+-- CSV data without converting it to a more specific type.
+newtype BSMap a = BSMap {
+ fromBSMap :: M.Map B.ByteString a
+ }
+
+-- | A 'HM.HashMap' keyed by 'B.ByteString' keys.
+--
+-- The primary use case of 'BSHashMap' is to decode a CSV file into a
+-- @'BSHashMap' 'B.ByteString' 'B.ByteString'@, which lets you process
+-- the CSV data without converting it to a more specific type.
+newtype BSHashMap a = BSHashMap {
+ fromBSHashMap :: HM.HashMap B.ByteString a
+ }
+
+-- | A type that can be converted from a single CSV record, with the
+-- possibility of failure.
+--
+-- When writing an instance, use 'empty', 'mzero', or 'fail' to make a
+-- conversion fail, e.g. if a 'Record' has the wrong number of
+-- columns.
+--
+-- Given this example data:
+--
+-- > name,age
+-- > John,56
+-- > Jane,55
+--
+-- here's an example type and instance:
+--
+-- @{-\# LANGUAGE OverloadedStrings \#-}
+--
+-- data Person = Person { name :: Text, age :: Int }
+--
+-- instance FromRecord Person where
+-- parseNamedRecord m = Person '<$>'
+-- m '.:' \"name\" '<*>'
+-- m '.:' \"age\"
+-- @
+--
+-- Note the use of the @OverloadedStrings@ language extension which
+-- enables 'B8.ByteString' values to be written as string literals.
+class FromNamedRecord a where
+ parseNamedRecord :: NamedRecord -> Parser a
+
+-- | A type that can be converted to a single CSV record.
+--
+-- An example type and instance:
+--
+-- @data Person = Person { name :: Text, age :: Int }
+--
+-- instance ToRecord Person where
+-- toNamedRecord (Person name age) = 'namedRecord' [
+-- \"name\" '.=' name, \"age\" '.=' age]
+-- @
+class ToNamedRecord a where
+ toNamedRecord :: a -> NamedRecord
+
+instance FromField a => FromNamedRecord (BSMap a) where
+ parseNamedRecord m = BSMap . M.fromList <$>
+ (traverse parseSnd $ HM.toList m)
+ where parseSnd (name, s) = (,) <$> pure name <*> parseField s
+
+instance FromField a => FromNamedRecord (BSHashMap a) where
+ parseNamedRecord m = BSHashMap <$> traverse (\ s -> parseField s) m
+
+------------------------------------------------------------------------
+-- Individual field conversion
+
-- | A type that can be converted from a single CSV field, with the
-- possibility of failure.
--
@@ -225,6 +327,9 @@ instance ToField a => ToRecord (Vector a) where
class FromField a where
parseField :: Field -> Parser a
+class ToField a where
+ toField :: a -> Field
+
instance FromField Char where
parseField s
| T.compareLength t 1 == EQ = pure (T.head t)
@@ -407,3 +512,27 @@ parseIntegral s = case parseOnly number s of
(.!) :: FromField a => Record -> Int -> Parser a
v .! idx = parseField (v ! idx)
{-# INLINE (.!) #-}
+
+-- | Retrieve a field in the given record by name. The result is
+-- 'empty' if the field is missing or if the value cannot be converted
+-- to the desired type.
+(.:) :: FromField a => NamedRecord -> B.ByteString -> Parser a
+m .: name = maybe (fail err) parseField $ HM.lookup name m
+ where err = "No field named " ++ B8.unpack name
+{-# INLINE (.:) #-}
+
+-- | Construct a pair from a name and a value. For use with
+-- 'namedRecord'.
+(.=) :: ToField a => B.ByteString -> a -> (B.ByteString, B.ByteString)
+name .= val = (name, toField val)
+{-# INLINE (.=) #-}
+
+-- | Construct a record from a list of 'B.ByteString's. Use 'toField'
+-- to convert values to 'B.ByteString's for use with 'record'.
+record :: [B.ByteString] -> Record
+record = V.fromList
+
+-- | Construct a named record from a list of name-value 'B.ByteString'
+-- pairs. Use '.=' to construct such a pair from a name and a value.
+namedRecord :: [(B.ByteString, B.ByteString)] -> NamedRecord
+namedRecord = HM.fromList
@@ -5,6 +5,7 @@ module Data.Ceason.Types.Internal
-- * Core CSV types
Csv
, Record
+ , NamedRecord
, Field
, Parser
, Result(..)
@@ -16,6 +17,7 @@ module Data.Ceason.Types.Internal
import Control.Applicative
import Control.Monad
import qualified Data.ByteString as S
+import qualified Data.HashMap.Strict as HM
import Data.Monoid
import Data.Vector (Vector)
@@ -26,6 +28,10 @@ type Csv = Vector Record
-- | A record corresponds to a single line in a CSV file.
type Record = Vector Field
+-- | A record corresponds to a single line in a CSV file, indexed by
+-- the column name rather than the column index.
+type NamedRecord = HM.HashMap S.ByteString S.ByteString
+
-- | A single field within a record.
type Field = S.ByteString
View
@@ -32,7 +32,9 @@ Library
base,
blaze-builder,
bytestring,
+ containers,
text,
+ unordered-containers,
vector
ghc-options: -Wall -O2

0 comments on commit ca77605

Please sign in to comment.