diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs
index f62a02c9990..1885e8e89e9 100644
--- a/hledger-lib/Hledger/Read/CsvReader.hs
+++ b/hledger-lib/Hledger/Read/CsvReader.hs
@@ -140,7 +140,7 @@ readJournalFromCsv separator mrulesfile csvfile csvdata =
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
records <- (either throwerr id .
- dbg2 "validateCsv" . validateCsv skiplines .
+ dbg2 "validateCsv" . validateCsv rules skiplines .
dbg2 "parseCsv")
`fmap` parseCsv separator parsecfilename csvdata
dbg1IO "first 3 csv records" $ take 3 records
@@ -216,11 +216,22 @@ printCSV records = unlined (printRecord `map` records)
unlined = concat . intersperse "\n"
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
-validateCsv :: Int -> Either String CSV -> Either String [CsvRecord]
-validateCsv _ (Left err) = Left err
-validateCsv numhdrlines (Right rs) = validate $ drop numhdrlines $ filternulls rs
+validateCsv :: CsvRules -> Int -> Either String CSV -> Either String [CsvRecord]
+validateCsv _ _ (Left err) = Left err
+validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ drop numhdrlines $ filternulls rs
where
filternulls = filter (/=[""])
+ skipCount r =
+ case (getEffectiveAssignment rules r "end", getEffectiveAssignment rules r "skip") of
+ (Nothing, Nothing) -> Nothing
+ (Just _, _) -> Just maxBound
+ (Nothing, Just "") -> Just 1
+ (Nothing, Just x) -> Just (read x)
+ applyConditionalSkips [] = []
+ applyConditionalSkips (r:rest) =
+ case skipCount r of
+ Nothing -> r:(applyConditionalSkips rest)
+ Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
validate [] = Right []
validate rs@(_first:_)
| isJust lessthan2 = let r = fromJust lessthan2 in
@@ -266,7 +277,7 @@ defaultRulesText csvfile = T.pack $ unlines
,""
,"account1 assets:bank:checking"
,""
- ,"fields date, description, amount"
+ ,"fields date, description, amount1"
,""
,"#skip 1"
,"#newest-first"
@@ -454,20 +465,8 @@ parseCsvRules rulesfile s =
validateRules :: CsvRules -> Either String CsvRules
validateRules rules = do
unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n"
- unless ((amount && not (amountin || amountout)) ||
- (not amount && (amountin && amountout)) ||
- balance)
- $ Left $ unlines [
- "Please specify (as a top level CSV rule) either the amount field,"
- ,"both the amount-in and amount-out fields, or the balance field. Eg:"
- ,"amount %2\n"
- ]
Right rules
where
- amount = isAssigned "amount"
- amountin = isAssigned "amount-in"
- amountout = isAssigned "amount-out"
- balance = isAssigned "balance" || isAssigned "balance1" || isAssigned "balance2"
isAssigned f = isJust $ getEffectiveAssignment rules [] f
-- parsers
@@ -553,8 +552,9 @@ fieldassignmentp :: CsvRulesParser (JournalFieldName, FieldTemplate)
fieldassignmentp = do
lift $ dbgparse 3 "trying fieldassignmentp"
f <- journalfieldnamep
- assignmentseparatorp
- v <- fieldvalp
+ v <- choiceInState [ assignmentseparatorp >> fieldvalp
+ , lift eolof >> return ""
+ ]
return (f,v)
> "field assignment"
@@ -566,14 +566,19 @@ journalfieldnamep = do
-- Transaction fields and pseudo fields for CSV conversion.
-- Names must precede any other name they contain, for the parser
-- (amount-in before amount; date2 before date). TODO: fix
-journalfieldnames = [
- "account1"
- ,"account2"
- ,"amount-in"
+journalfieldnames =
+ concat [[ "account" ++ i
+ ,"amount" ++ i ++ "-in"
+ ,"amount" ++ i ++ "-out"
+ ,"amount" ++ i
+ ,"balance" ++ i
+ ,"comment" ++ i
+ ,"currency" ++ i
+ ] | x <- [1..9], let i = show x]
+ ++
+ ["amount-in"
,"amount-out"
,"amount"
- ,"balance1"
- ,"balance2"
,"balance"
,"code"
,"comment"
@@ -582,17 +587,16 @@ journalfieldnames = [
,"date"
,"description"
,"status"
+ ,"skip" -- skip and end are not really fields, but we list it here to allow conditional rules that skip records
+ ,"end"
]
assignmentseparatorp :: CsvRulesParser ()
assignmentseparatorp = do
lift $ dbgparse 3 "trying assignmentseparatorp"
- choice [
- -- try (lift (skipMany spacenonewline) >> oneOf ":="),
- try (lift (skipMany spacenonewline) >> char ':'),
- spaceChar
- ]
- _ <- lift (skipMany spacenonewline)
+ _ <- choiceInState [ lift (skipMany spacenonewline) >> char ':' >> lift (skipMany spacenonewline)
+ , lift (skipSome spacenonewline)
+ ]
return ()
fieldvalp :: CsvRulesParser String
@@ -662,8 +666,9 @@ regexp = do
type CsvRecord = [String]
--- Convert a CSV record to a transaction using the rules, or raise an
--- error if the data can not be parsed.
+showRules rules record =
+ unlines $ catMaybes [ (("the "++fld++" rule is: ")++) <$> getEffectiveAssignment rules record fld | fld <- journalfieldnames]
+
transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules record = t
where
@@ -679,11 +684,11 @@ transactionFromCsvRecord sourcepos rules record = t
mdateformat = mdirective "date-format"
date = render $ fromMaybe "" $ mfieldtemplate "date"
date' = fromMaybe (error' $ dateerror "date" date mdateformat) $ mparsedate date
- mdate2 = maybe Nothing (Just . render) $ mfieldtemplate "date2"
+ mdate2 = render <$> mfieldtemplate "date2"
mdate2' = maybe Nothing (maybe (error' $ dateerror "date2" (fromMaybe "" mdate2) mdateformat) Just . mparsedate) mdate2
dateerror datefield value mdateformat = unlines
["error: could not parse \""++value++"\" as a date using date format "++maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" show mdateformat
- ,"the CSV record is: "++showRecord record
+ , showRecord record
,"the "++datefield++" rule is: "++(fromMaybe "required, but missing" $ mfieldtemplate datefield)
,"the date-format is: "++fromMaybe "unspecified" mdateformat
,"you may need to "
@@ -703,58 +708,110 @@ transactionFromCsvRecord sourcepos rules record = t
["error: could not parse \""++str++"\" as a cleared status (should be *, ! or empty)"
,"the parse error is: "++customErrorBundlePretty err
]
- code = maybe "" render $ mfieldtemplate "code"
- description = maybe "" render $ mfieldtemplate "description"
- comment = maybe "" render $ mfieldtemplate "comment"
- precomment = maybe "" render $ mfieldtemplate "precomment"
- currency = maybe (fromMaybe "" mdefaultcurrency) render $ mfieldtemplate "currency"
- amountstr = (currency++) <$> simplifySign <$> getAmountStr rules record
- maybeamount = either amounterror (Mixed . (:[])) <$> runParser (evalStateT (amountp <* eof) mempty) "" <$> T.pack <$> amountstr
- amounterror err = error' $ unlines
- ["error: could not parse \""++fromJust amountstr++"\" as an amount"
- ,showRecord record
- ,"the amount rule is: "++(fromMaybe "" $ mfieldtemplate "amount")
- ,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency")
- ,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
- ,"the parse error is: "++customErrorBundlePretty err
- ,"you may need to "
- ++"change your amount or currency rules, "
- ++"or "++maybe "add a" (const "change your") mskip++" skip rule"
- ]
- amount1 = case maybeamount of
- Just a -> a
- Nothing | balance1 /= Nothing || balance2 /= Nothing -> nullmixedamt
- Nothing -> error' $ "amount and balance have no value\n"++showRecord record
- -- convert balancing amount to cost like hledger print, so eg if
- -- amount1 is "10 GBP @@ 15 USD", amount2 will be "-15 USD".
- amount2 = costOfMixedAmount (-amount1)
+ code = singleline $ maybe "" render $ mfieldtemplate "code"
+ description = singleline $ maybe "" render $ mfieldtemplate "description"
+ comment = singleline $ maybe "" render $ mfieldtemplate "comment"
+ precomment = singleline $ maybe "" render $ mfieldtemplate "precomment"
+
s `or` def = if null s then def else s
- defaccount1 = fromMaybe "unknown" $ mdirective "default-account1"
- defaccount2 = case isNegativeMixedAmount amount2 of
- Just True -> "income:unknown"
- _ -> "expenses:unknown"
- account1 = T.pack $ maybe "" render (mfieldtemplate "account1") `or` defaccount1
- account2 = T.pack $ maybe "" render (mfieldtemplate "account2") `or` defaccount2
- balance1template =
- case (mfieldtemplate "balance", mfieldtemplate "balance1") of
- (Nothing, Nothing) -> Nothing
- (balance, Nothing) -> balance
- (Nothing, balance1) -> balance1
- (Just _, Just _) -> error' "Please use either balance or balance1, but not both"
- balance1 = maybe Nothing (parsebalance "1".render) $ balance1template
- balance2 = maybe Nothing (parsebalance "2".render) $ mfieldtemplate "balance2"
- parsebalance n str
+ parsebalance currency n str
| all isSpace str = Nothing
| otherwise = Just $ (either (balanceerror n str) id $ runParser (evalStateT (amountp <* eof) mempty) "" $ T.pack $ (currency++) $ simplifySign str, nullsourcepos)
balanceerror n str err = error' $ unlines
["error: could not parse \""++str++"\" as balance"++n++" amount"
,showRecord record
- ,"the balance"++n++" rule is: "++(fromMaybe "" $ mfieldtemplate ("balance"++n))
- ,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency")
+ ,showRules rules record
,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
,"the parse error is: "++customErrorBundlePretty err
]
+ parsePosting' number accountFld amountFld amountInFld amountOutFld balanceFld commentFld =
+ let currency = maybe (fromMaybe "" mdefaultcurrency) render $
+ (mfieldtemplate ("currency"++number) `or `mfieldtemplate "currency")
+ amount = chooseAmount rules record currency amountFld amountInFld amountOutFld
+ account' = ((T.pack . render) <$> (mfieldtemplate accountFld
+ `or` mdirective ("default-account" ++ number)))
+ balance = (parsebalance currency number.render) =<< mfieldtemplate balanceFld
+ comment = T.pack $ maybe "" render $ mfieldtemplate commentFld
+ account =
+ case account' of
+ -- If account is explicitly "unassigned", suppress posting
+ -- Otherwise, generate posting with "expenses:unknown" account if we have amount/balance information
+ Just "" -> Nothing
+ Just account -> Just account
+ Nothing ->
+ -- If we have amount or balance assertion (which implies potential amount change),
+ -- but no account name, lets generate "expenses:unknown" account name.
+ case (amount, balance) of
+ (Just _, _ ) -> Just "expenses:unknown"
+ (_, Just _) -> Just "expenses:unknown"
+ (Nothing, Nothing) -> Nothing
+ in
+ case account of
+ Nothing -> Nothing
+ Just account ->
+ Just $ (number, posting {paccount=account, pamount=fromMaybe missingmixedamt amount, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance, pcomment = comment})
+
+ parsePosting number =
+ parsePosting' number
+ ("account"++number)
+ ("amount"++number)
+ ("amount"++number++"-in")
+ ("amount"++number++"-out")
+ ("balance"++number)
+ ("comment" ++ number)
+
+ postingLegacy = parsePosting' "" "account1" "amount" "amount-in" "amount-out" "balance" "comment1"
+ posting1' = parsePosting "1"
+ posting1 =
+ case (postingLegacy,posting1') of
+ (Just (_,legacy), Nothing) -> Just ("1", legacy)
+ (Nothing, Just (_,posting1)) -> Just ("1", posting1)
+ (Just (_,legacy), Just (_,posting1)) ->
+ -- Here we merge legacy fields such as "amount" with "amount1", etc
+ -- Account and Comment would be the same by construction
+ let balanceassertion = (pbalanceassertion legacy) `or` (pbalanceassertion posting1)
+ amount =
+ let al = pamount legacy
+ a1 = pamount posting1
+ in
+ if al == a1 then al
+ else
+ case (isZeroMixedAmount al, isZeroMixedAmount a1) of
+ (True, _) -> a1
+ (False, True) -> al
+ (False, False) ->
+ error' $ unlines [ "amount/amount-in/amount-out and amount1/amount1-in/amount1-out produced conflicting values"
+ , showRecord record
+ , showRules rules record
+ , "amount/amount-in/amount-out is " ++ showMixedAmount al
+ , "amount1/amount1-in/amount1-out is " ++ showMixedAmount a1
+ ]
+ in Just $ ("1", posting {paccount=paccount posting1, pamount=amount, ptransaction=Just t, pbalanceassertion=balanceassertion, pcomment = pcomment posting1})
+ (Nothing, Nothing) -> Nothing
+ postings' = catMaybes $ posting1:[ parsePosting i | x<-[2..9], let i = show x]
+
+ improveUnknownAccountName p =
+ if paccount p /="expenses:unknown"
+ then p
+ else case isNegativeMixedAmount (pamount p) of
+ Just True -> p{paccount = "income:unknown"}
+ Just False -> p{paccount = "expenses:unknown"}
+ _ -> p
+
+ postings =
+ case postings' of
+ -- To be compatible with the behavior of the old code which allowed two postings only, we enforce
+ -- second posting when rules generated just first of them.
+ -- When we have srictly first and second posting, but second posting does not have amount, we fill it in.
+ [("1",posting1)] ->
+ [posting1,improveUnknownAccountName (posting{paccount="expenses:unknown", pamount=costOfMixedAmount(-(pamount posting1)), ptransaction=Just t})]
+ [("1",posting1),("2",posting2)] ->
+ case (pamount posting1 == missingmixedamt , pamount posting2 == missingmixedamt) of
+ (False, True) -> [posting1, improveUnknownAccountName (posting2{pamount=costOfMixedAmount(-(pamount posting1))})]
+ _ -> [posting1, posting2]
+ _ -> map snd postings'
+
-- build the transaction
t = nulltransaction{
tsourcepos = genericSourcePos sourcepos,
@@ -764,39 +821,56 @@ transactionFromCsvRecord sourcepos rules record = t
tcode = T.pack code,
tdescription = T.pack description,
tcomment = T.pack comment,
- tprecedingcomment = T.pack precomment,
- tpostings =
- [posting {paccount=account1, pamount=amount1, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance1}
- ,posting {paccount=account2, pamount=amount2, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance2}
- ]
+ tprecedingcomment = T.pack precomment,
+ tpostings = postings
}
toAssertion (a, b) = assertion{
baamount = a,
baposition = b
}
-getAmountStr :: CsvRules -> CsvRecord -> Maybe String
-getAmountStr rules record =
+chooseAmount :: CsvRules -> CsvRecord -> String -> String -> String -> String -> Maybe MixedAmount
+chooseAmount rules record currency amountFld amountInFld amountOutFld =
let
- mamount = getEffectiveAssignment rules record "amount"
- mamountin = getEffectiveAssignment rules record "amount-in"
- mamountout = getEffectiveAssignment rules record "amount-out"
- render = fmap (strip . renderTemplate rules record)
+ mamount = getEffectiveAssignment rules record amountFld
+ mamountin = getEffectiveAssignment rules record amountInFld
+ mamountout = getEffectiveAssignment rules record amountOutFld
+ parse amt = notZero =<< (parseAmount currency <$> notEmpty =<< (strip . renderTemplate rules record) <$> amt)
in
- case (render mamount, render mamountin, render mamountout) of
- (Just "", Nothing, Nothing) -> Nothing
+ case (parse mamount, parse mamountin, parse mamountout) of
+ (Nothing, Nothing, Nothing) -> Nothing
(Just a, Nothing, Nothing) -> Just a
- (Nothing, Just "", Just "") -> error' $ "neither amount-in or amount-out has a value\n"
- ++ " record: " ++ showRecord record
- (Nothing, Just i, Just "") -> Just i
- (Nothing, Just "", Just o) -> Just $ negateStr o
- (Nothing, Just i, Just o) -> error' $ "both amount-in and amount-out have a value\n"
- ++ " amount-in: " ++ i ++ "\n"
- ++ " amount-out: " ++ o ++ "\n"
+ (Nothing, Just i, Nothing) -> Just i
+ (Nothing, Nothing, Just o) -> Just $ negate o
+ (Nothing, Just i, Just o) -> error' $ "both "++amountInFld++" and "++amountOutFld++" have a value\n"
+ ++ " "++amountInFld++": " ++ show i ++ "\n"
+ ++ " "++amountOutFld++": " ++ show o ++ "\n"
++ " record: " ++ showRecord record
- _ -> error' $ "found values for amount and for amount-in/amount-out\n"
- ++ "please use either amount or amount-in/amount-out\n"
+ _ -> error' $ "found values for "++amountFld++" and for "++amountInFld++"/"++amountOutFld++"\n"
+ ++ "please use either "++amountFld++" or "++amountInFld++"/"++amountOutFld++"\n"
++ " record: " ++ showRecord record
+ where
+ notZero amt = if isZeroMixedAmount amt then Nothing else Just amt
+ notEmpty str = if str=="" then Nothing else Just str
+
+ parseAmount currency amountstr =
+ either (amounterror amountstr) (Mixed . (:[]))
+ <$> runParser (evalStateT (amountp <* eof) mempty) ""
+ <$> T.pack
+ <$> (currency++)
+ <$> simplifySign
+ <$> amountstr
+
+ amounterror amountstr err = error' $ unlines
+ ["error: could not parse \""++fromJust amountstr++"\" as an amount"
+ ,showRecord record
+ ,showRules rules record
+ ,"the default-currency is: "++fromMaybe "unspecified" (getDirective "default-currency" rules)
+ ,"the parse error is: "++customErrorBundlePretty err
+ ,"you may need to "
+ ++"change your amount or currency rules, "
+ ++"or add or change your skip rule"
+ ]
type CsvAmountString = String
@@ -861,7 +935,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
-- | Render a field assigment's template, possibly interpolating referenced
-- CSV field values. Outer whitespace is removed from interpolated values.
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
-renderTemplate rules record t = regexReplaceBy "%[A-z0-9]+" replace t
+renderTemplate rules record t = regexReplaceBy "%[A-z0-9_-]+" replace t
where
replace ('%':pat) = maybe pat (\i -> strip $ atDef "" record (i-1)) mindex
where
diff --git a/hledger-lib/Hledger/Utils/String.hs b/hledger-lib/Hledger/Utils/String.hs
index 23fb3163839..03342317dfe 100644
--- a/hledger-lib/Hledger/Utils/String.hs
+++ b/hledger-lib/Hledger/Utils/String.hs
@@ -21,6 +21,7 @@ module Hledger.Utils.String (
lstrip,
rstrip,
chomp,
+ singleline,
elideLeft,
elideRight,
formatString,
@@ -76,6 +77,10 @@ rstrip = reverse . lstrip . reverse
chomp :: String -> String
chomp = reverse . dropWhile (`elem` "\r\n") . reverse
+-- | Remove consequtive line breaks, replacing them with single space
+singleline :: String -> String
+singleline = unwords . filter (/="") . (map strip) . lines
+
stripbrackets :: String -> String
stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String
diff --git a/hledger-lib/hledger_csv.m4.md b/hledger-lib/hledger_csv.m4.md
index 03d0fd22cc6..4055c857f9b 100644
--- a/hledger-lib/hledger_csv.m4.md
+++ b/hledger-lib/hledger_csv.m4.md
@@ -38,7 +38,7 @@ At minimum, the rules file must identify the date and amount fields.
It's often necessary to specify the date format, and the number of header lines to skip, also.
Eg:
```
-fields date, _, _, amount
+fields date, _, _, amount1
date-format %d/%m/%Y
skip 1
```
@@ -55,7 +55,7 @@ A more complete example:
skip 1
# name the csv fields (and assign the transaction's date, amount and code)
-fields date, _, toorfrom, name, amzstatus, amount, fees, code
+fields date, _, toorfrom, name, amzstatus, amount1, fees, code
# how to parse the date
date-format %b %-d, %Y
@@ -64,7 +64,7 @@ date-format %b %-d, %Y
description %toorfrom %name
# save these fields as tags
-comment status:%amzstatus, fees:%fees
+comment status:%amzstatus
# set the base account for all transactions
account1 assets:amazon
@@ -72,6 +72,9 @@ account1 assets:amazon
# flip the sign on the amount
amount -%amount
+# Put fees in a separate posting
+amount3 %fees
+comment3 fees
```
For more examples, see [Convert CSV files](https://github.com/simonmichael/hledger/wiki/Convert-CSV-files).
@@ -92,7 +95,7 @@ You'll need this whenever your CSV data contains header lines. Eg:
-
+This can also be used in a conditional block to ignore certain CSV records.
```rules
# ignore the first CSV line
skip 1
@@ -133,7 +136,13 @@ date-format %-m/%-d/%Y %l:%M %p
This (a) names the CSV fields, in order (names may not contain whitespace; uninteresting names may be left blank),
and (b) assigns them to journal entry fields if you use any of these standard field names:
-`date`, `date2`, `status`, `code`, `description`, `comment`, `account1`, `account2`, `amount`, `amount-in`, `amount-out`, `currency`, `balance`, `balance1`, `balance2`.
+
+Fields `date`, `date2`, `status`, `code`, `description` will form transaction description.
+
+An assignment to any of `accountN`, `amountN`, `amountN-in`, `amountN-out`, `balanceN` or `currencyN` will generate a posting (though it's your responsibility to ensure it is a well formed one). Normally the `N`'s are consecutive starting from 1 but it's not required. One posting will be generated for each unique `N`. If you wish to supply a comment for the posting, use `commentN`, though comment on its own will not cause posting to be generated.
+
+Fields `amount`, `amount-in`, `amount-out`, `currency`, `balance` and `comment` are treated as aliases for `amount1`, and so on. If your rules file leads to both aliased fields having different values, `hledger` will raise an error.
+
Eg:
```rules
# use the 1st, 2nd and 4th CSV fields as the entry's date, description and amount,
@@ -142,9 +151,11 @@ Eg:
# CSV field:
# 1 2 3 4 5 6 7 8
# entry field:
-fields date, description, , amount, , , somefield, anotherfield
+fields date, description, , amount1, , , somefield, anotherfield
```
+For backwards compatibility, we treat posting 1 specially. If your rules generated just posting 1, another posting would be added to your transaction to balance it. If your rules generated posting 1 and posting 2, but amount in the posting 2 is empty, hledger will fill it out with the opposite of posting 1. This special handling is needed to ensure smooth upgrade path from version 1.15.
+
## field assignment
*`ENTRYFIELDNAME`* *`FIELDVALUE`*
@@ -177,12 +188,23 @@ Note, interpolation strips any outer whitespace, so a CSV value like
*`PATTERN`*...\
*`FIELDASSIGNMENTS`*...
+`if` *`PATTERN`*\
+*`PATTERN`*...\
+ *`skip N`*...
+
+`if` *`PATTERN`*\
+*`PATTERN`*...\
+ *`end`*...
+
This applies one or more field assignments, only to those CSV records matched by one of the PATTERNs.
The patterns are case-insensitive regular expressions which match anywhere
within the whole CSV record (it's not yet possible to match within a
specific field). When there are multiple patterns they can be written
on separate lines, unindented.
The field assignments are on separate lines indented by at least one space.
+
+Instead of field assignments you can specify `skip` or `skip 1` to skip this record, `skip N` to skip the next N records (including the one that matchied) or `end` to skip the rest of the file.
+
Examples:
```rules
# if the CSV record contains "groceries", set account2 to "expenses:groceries"
@@ -231,22 +253,21 @@ The order of same-day entries will be preserved
## CSV accounts
-Each journal entry will have two [postings](journal.html#postings), to `account1` and `account2` respectively.
-It's not yet possible to generate entries with more than two postings.
+Each journal entry will have at least two [postings](journal.html#postings), to `account1` and some other account (usually `account2`).
It's conventional and recommended to use `account1` for the account whose CSV we are reading.
## CSV amounts
-A transaction [amount](journal.html#amounts) must be set, in one of these ways:
+A posting [amount](journal.html#amounts) could be set in one of these ways:
-- with an `amount` field assignment, which sets the first posting's amount
+- with an `amountN` field assignment, which sets the Nth posting's amount
- (When the CSV has debit and credit amounts in separate fields:)\
- with field assignments for the `amount-in` and `amount-out` pseudo
+ with field assignments for the `amountN-in` and `amountN-out` pseudo
fields (both of them). Whichever one has a value will be used, with
appropriate sign. If both contain a value, it might not work so well.
-- or implicitly by means of a [balance assignment](journal.html#balance-assignments) (see below).
+- with `balanceN` field assignment that creates a [balance assignment](journal.html#balance-assignments) (see below).
There is some special handling for sign in amounts:
@@ -254,24 +275,53 @@ There is some special handling for sign in amounts:
- If an amount value begins with a double minus sign, those will cancel out and be removed.
If the currency/commodity symbol is provided as a separate CSV field,
-assign it to the `currency` pseudo field; the symbol will be prepended
+assign it to the `currency` pseudo field (applicable to the whole transaction) or `currencyN` (applicable to Nth posting only); the symbol will be prepended
to the amount
(TODO: when there is an amount).
-Or, you can use an `amount` [field assignment](#field-assignment) for more control, eg:
+Or, you can use an `amountN` [field assignment](#field-assignment) for more control, eg:
```
-fields date,description,currency,amount
-amount %amount %currency
+fields date,description,currency,amount1
+amount1 %amount1 %currency
```
## CSV balance assertions/assignments
If the CSV includes a running balance, you can assign that to one of the pseudo fields
-`balance` (or `balance1`) or `balance2`.
+`balance` (or `balance1`), `balance2`, ... up to `balance9`.
This will generate a [balance assertion](journal.html#balance-assertions)
(or if the amount is left empty, a [balance assignment](journal.html#balance-assignments)),
-on the first or second posting,
-whenever the running balance field is non-empty.
-(TODO: [#1000](https://github.com/simonmichael/hledger/issues/1000))
+on the appropriate posting, whenever the running balance field is non-empty.
+
+## References to other fields and evaluation order
+
+Field assignments could include references to other fields or even to the same field you are trying to assign:
+
+```
+fields date,description,currency,amount1
+
+amount1 %amount1 USD
+amount1 %amount1 EUR
+amount1 %amount1 %currency
+
+if SOME_REGEXP
+ amount1 %amount1 GBP
+```
+This is how this file would be evaluated.
+
+First, parts of CVS record are assigned according to `fields` directive.
+
+Then all other field assignments -- written at top level, or included in `if` blocks -- are considered to see if they should be applied. They are checked in the order they are written, with later assignment overwriting earlier ones.
+
+Once full set of field assignments that should be applied is known, their values are computed, and this is when all `%` references are evaluated.
+
+So for a particular row from CSV file, value from fourth column would be assigned to `amount1`.
+
+Then `hledger` will decide that `amount1` would have to be amended to `%amount1 USD`, but this will not happen immediately. This choice would be replaced by decision to rewrite `amount1` to `%amount EUR`, which will in turn be thrown away in favor of `%amount1 %currency`. If the `if` block condition will match the row, it will assign `amount1` to `%amount1 GBP`.
+
+Overall, we will end up with one of the two alternatives for `amount1` - either `%amount1 %currency` or `%amount1 GBP`.
+
+Now substitution of all referenced values will happen, using the current values for `%amount1` and `currency`, which were provided by the `fields` directive.
+
## Reading multiple CSV files
diff --git a/tests/csv.test b/tests/csv.test
index 8486d55f927..174d3474066 100644
--- a/tests/csv.test
+++ b/tests/csv.test
@@ -1,11 +1,13 @@
-# These tests read CSV from stdin for convenience, so to ensure we get the CSV reader's
-# error, the csv: prefix is used.
-#
-# The final cleanup command is chained with && so as not to mask hledger's exit code,
-# but this means a temp file is left behind whenever hledger fails. What TODO ?
-
# 1. read CSV to hledger journal format
-$ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+10/2009/09,Flubber Co,50
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+
+$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50
income:unknown $-50
@@ -16,7 +18,16 @@ $ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $
<
10/2009/09,Flubber Co🎅,50,
11/2009/09,Flubber Co🎅,,50
-$ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >t.$$.csv.rules ; hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+RULES
+account1 Assets:MyAccount
+date %1
+date-format %d/%Y/%m
+description %2
+amount-in %3
+amount-out %4
+currency $
+
+$ ./hledger-csv
2009/09/10 Flubber Co🎅
Assets:MyAccount $50
income:unknown $-50
@@ -28,7 +39,18 @@ $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescrip
>=0
# 3. handle conditions assigning multiple fields
-$ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\nif Flubber\n account2 acct\n comment cmt' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+10/2009/09,Flubber Co,50
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if Flubber
+ account2 acct
+ comment cmt
+$ ./hledger-csv
2009/09/10 Flubber Co ; cmt
assets:myacct $50
acct $-50
@@ -36,7 +58,16 @@ $ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $
>=0
# 4. read CSV with balance field
-$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+10/2009/09,Flubber Co,50,123
+
+RULES
+fields date, description, amount, balance
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+
+$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $123
income:unknown $-50
@@ -44,7 +75,17 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc
>=0
# 5. read CSV with empty balance field
-$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n11/2009/09,Blubber Co,60,\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+10/2009/09,Flubber Co,50,123
+11/2009/09,Blubber Co,60,
+
+RULES
+fields date, description, amount, balance
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+
+$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $123
income:unknown $-50
@@ -56,7 +97,17 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc
>=0
# 6. read CSV with only whitespace in balance field
-$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n11/2009/09,Blubber Co,60, \n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+10/2009/09,Flubber Co,50,123
+11/2009/09,Blubber Co,60,
+
+RULES
+fields date, description, amount, balance
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+
+$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $123
income:unknown $-50
@@ -68,7 +119,22 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc
>=0
# 7. read CSV with rule double-negating column
-$ printf 'skip 1\n\ncurrency $\n\nfields date, payee, payment\n\namount -%%payment\naccount1 liabilities:bank\naccount2 expense:other' >t.$$.csv.rules; printf 'date,payee,amount\n2009/10/9,Flubber Co,50\n2009/11/09,Merchant Credit,-60\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+date,payee,amount
+2009/10/9,Flubber Co,50
+2009/11/09,Merchant Credit,-60
+
+RULES
+skip 1
+
+currency $
+
+fields date, payee, payment
+
+amount -%payment
+account1 liabilities:bank
+account2 expense:other
+$ ./hledger-csv
2009/10/09
liabilities:bank $-50
expense:other $50
@@ -83,7 +149,16 @@ $ printf 'skip 1\n\ncurrency $\n\nfields date, payee, payment\n\namount -%%paym
<
10/2009/09;Flubber Co🎅;50;
11/2009/09;Flubber Co🎅;;50
-$ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger --separator ';' -f csv:- --rules-file rules.$$ print && rm -rf rules.$$
+RULES
+account1 Assets:MyAccount
+date %1
+date-format %d/%Y/%m
+description %2
+amount-in %3
+amount-out %4
+currency $
+
+$ ./hledger-csv --separator ';'
2009/09/10 Flubber Co🎅
Assets:MyAccount $50
income:unknown $-50
@@ -95,7 +170,16 @@ $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescrip
>=0
# 9. read CSV with balance2 field
-$ printf 'fields date, description, amount, balance2\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+10/2009/09,Flubber Co,50,123
+
+RULES
+fields date, description, amount, balance2
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+
+$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50
income:unknown $-50 = $123
@@ -103,7 +187,16 @@ $ printf 'fields date, description, amount, balance2\ndate-format %%d/%%Y/%%m\n
>=0
# 10. read CSV with balance1 and balance2 fields
-$ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,321,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
+<
+10/2009/09,Flubber Co,50,321,123
+
+RULES
+fields date, description, amount, balance1, balance2
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+
+$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $321
income:unknown $-50 = $123
@@ -111,6 +204,291 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d
>=0
+# 11. More than two postings
+<
+10/2009/09,Flubber Co,50,321,123,0.234,VAT
+
+RULES
+fields date, description, amount, balance1, balance2, amount3,comment3
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+account3 expenses:tax
+
+$ ./hledger-csv
+2009/09/10 Flubber Co
+ assets:myacct $50 = $321
+ expenses:unknown = $123
+ expenses:tax $0.234 ; VAT
+
+>=0
+
+# 12. More than two postings and different currencies
+<
+10/2009/09,Flubber Co,50,321,123,£,0.234,VAT
+
+RULES
+fields date, description, amount, balance1, balance2, currency3, amount3,comment3
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+account3 expenses:tax
+
+$ ./hledger-csv
+2009/09/10 Flubber Co
+ assets:myacct $50 = $321
+ expenses:unknown = $123
+ expenses:tax £0.234 ; VAT
+
+>=0
+
+# 13. reading CSV with in-field and out-field, where one could be zero
+<
+10/2009/09,Flubber Co🎅,50,0
+11/2009/09,Flubber Co🎅,0.00,50
+RULES
+account1 Assets:MyAccount
+date %1
+date-format %d/%Y/%m
+description %2
+amount-in %3
+amount-out %4
+currency $
+
+$ ./hledger-csv
+2009/09/10 Flubber Co🎅
+ Assets:MyAccount $50
+ income:unknown $-50
+
+2009/09/11 Flubber Co🎅
+ Assets:MyAccount $-50
+ expenses:unknown $50
+
+>=0
+
+# 14. multiline descriptions
+<
+10/2009/09,"Flubber Co
+
+
+
+Co
+Co
+
+
+
+
+",50
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+$ ./hledger-csv
+2009/09/10 Flubber Co Co Co
+ assets:myacct $50
+ income:unknown $-50
+
+>=0
+
+# 15. recursive interpolation
+<
+myacct,10/2009/09,Flubber Co,50,
+
+RULES
+
+fields account1, date, description, amount-in, amount-out
+date-format %d/%Y/%m
+currency $
+if Flubber
+ account1 assets:%account1
+ amount-in (%amount-in)
+$ ./hledger-csv
+2009/09/10 Flubber Co
+ assets:myacct $-50
+ expenses:unknown $50
+
+>=0
+
+# 16. Real life-ish paypal parsing example
+<
+"12/22/2018","06:22:50","PST","Someone","Subscription Payment","Completed","USD","10.00","-0.59","9.41","someone@some.where","simon@joyful.com","123456789","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","57.60",""
+
+RULES
+fields date, time, timezone, description, type, status_, currency, grossamount, feeamount, netamount, fromemail, toemail, code, itemtitle, itemid, referencetxnid, receiptid, balance, note
+account1 sm:assets:online:paypal
+amount1 %netamount
+account2 sm:expenses:unknown
+account3 JS:expenses:banking:paypal
+amount3 %feeamount
+balance %18
+code %13
+currency $
+date %1
+date-format %m/%d/%Y
+description %description for %itemtitle
+$ ./hledger-csv
+2018/12/22 (123456789) Someone for Joyful Systems
+ sm:assets:online:paypal $9.41 = $57.60
+ sm:expenses:unknown
+ JS:expenses:banking:paypal $-0.59
+
+>=0
+
+# 17. Show that #415 is fixed
+<
+"2016/01/01","$1"
+"2016/02/02","$1,000.00"
+RULES
+account1 unknown
+amount %2
+date %1
+date-format %Y/%m/%d
+$ ./hledger-csv | hledger balance -f - --no-total
+ $-1,001.00 income:unknown
+ $1,001.00 unknown
+>=0
+
+# 18. Conditional skips
+<
+HEADER
+10/2009/09,Flubber Co,50
+MIDDLE SKIP THIS LINE
+AND THIS
+AND THIS ONE
+10/2009/09,Flubber Co,50
+*** END OF FILE ***
+More lines of the trailer here
+They all should be ignored
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+
+if HEADER
+ skip
+
+if
+END OF FILE
+ end
+
+if MIDDLE
+ skip 3
+
+$ ./hledger-csv
+2009/09/10 Flubber Co
+ assets:myacct $50
+ income:unknown $-50
+
+2009/09/10 Flubber Co
+ assets:myacct $50
+ income:unknown $-50
+
+>=0
+
+# 19. Lines with just balance, no amount (#1000)
+<
+2018-10-15,100
+2018-10-16,200
+2018-10-17,300
+RULES
+fields date,bal
+
+balance EUR %bal
+date-format %Y-%m-%d
+description Assets Update
+account1 assets
+account2 income
+$ ./hledger-csv
+2018/10/15 Assets Update
+ assets = EUR 100
+ income
+
+2018/10/16 Assets Update
+ assets = EUR 200
+ income
+
+2018/10/17 Assets Update
+ assets = EUR 300
+ income
+
+>=0
+
+# 20. Test for #1001 - empty assignment to amount show not eat next line
+<
+2018-10-15,100
+2018-10-16,200
+2018-10-17,300
+RULES
+fields date,bal
+
+balance EUR %bal
+date-format %Y-%m-%d
+description Assets Update
+account1 assets
+account2 income
+if 2018
+ amount
+ comment Dont eat me
+ balance
+ comment Dont eat me
+$ ./hledger-csv
+2018/10/15 Assets Update ; Dont eat me
+ assets
+ income
+
+2018/10/16 Assets Update ; Dont eat me
+ assets
+ income
+
+2018/10/17 Assets Update ; Dont eat me
+ assets
+ income
+
+>=0
+
+# 21. Amountless postings and conditional third posting
+<
+"12/22/2018","06:22:50","PST","Someone","Subscription Payment","Completed","USD","10.00","-0.59","9.41","someone@some.where","simon@joyful.com","123456789","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","57.60",""
+"12/22/2018","06:22:50","PST","Someone","Empty fee","Completed","USD","10.00","","6.66","someone@some.where","simon@joyful.com","987654321","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","99.60",""
+"12/22/2018","06:22:50","PST","Someone","Conditional Empty fee","Completed","USD","10.00","-1.23","7.77","someone@some.where","simon@joyful.com","10101010101","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","88.66",""
+
+RULES
+fields date, time, timezone, description, type, status_, currency, grossamount, feeamount, netamount, fromemail, toemail, code, itemtitle, itemid, referencetxnid, receiptid, balance, note
+account1 sm:assets:online:paypal
+amount1 %netamount
+account2 sm:expenses:unknown
+account3 JS:expenses:banking:paypal
+amount3 %feeamount
+balance %18
+code %13
+currency $
+date %1
+date-format %m/%d/%Y
+description %description for %itemtitle
+if Conditional Empty Fee
+ account3
+
+$ ./hledger-csv
+2018/12/22 (123456789) Someone for Joyful Systems
+ sm:assets:online:paypal $9.41 = $57.60
+ sm:expenses:unknown
+ JS:expenses:banking:paypal $-0.59
+
+2018/12/22 (987654321) Someone for Joyful Systems
+ sm:assets:online:paypal $6.66 = $99.60
+ sm:expenses:unknown
+ JS:expenses:banking:paypal
+
+2018/12/22 (10101010101) Someone for Joyful Systems
+ sm:assets:online:paypal $7.77 = $88.66
+ sm:expenses:unknown $-7.77
+
+>=0
+
+
# . TODO: without --separator gives obscure error
# |
# 1 | 10/2009/09;Flubber Co🎅;50;
@@ -119,10 +497,18 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d
# <
# 10/2009/09;Flubber Co🎅;50;
# 11/2009/09;Flubber Co🎅;;50
-# $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger -f csv:- --rules-file rules.$$ print && rm -rf rules.$$
+# RULES
+# account1 Assets:MyAccount
+# date %1
+# date-format %d/%Y/%m
+# description %2
+# amount-in %3
+# amount-out %4
+# currency $
+# $ ./hledger-csv
# 2009/09/10 Flubber Co🎅
# Assets:MyAccount $50
-# income:unknown $-50
+# expenses:unknown $-50
#
# 2009/09/11 Flubber Co🎅
# Assets:MyAccount $-50
@@ -134,10 +520,18 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d
# <
# 10/2009/09 Flubber Co🎅 50
# 11/2009/09 Flubber Co🎅 50
-# $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger --separator "\t" -f csv:- --rules-file rules.$$ print && rm -rf rules.$$
+# RULES
+# account1 Assets:MyAccount
+# date %1
+# date-format %d/%Y/%m
+# description %2
+# amount-in %3
+# amount-out %4
+# currency $
+# $ ./hledger-csv
# 2009/09/10 Flubber Co🎅
# Assets:MyAccount $50
-# income:unknown $-50
+# expenses:unknown $-50
#
# 2009/09/11 Flubber Co🎅
# Assets:MyAccount $-50
diff --git a/tests/hledger-csv b/tests/hledger-csv
new file mode 100755
index 00000000000..8ea96a06ddc
--- /dev/null
+++ b/tests/hledger-csv
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# This scripts expects stdin formatted like this:
+#
+# RULES
+#
+#
+awk -vCSV="t.$$.csv" -vRULES="t.$$.csv.rules" '
+BEGIN{output=CSV}
+/^RULES/{output=RULES}
+!/^RULES/{print $0 >output}'
+
+trap "rm -f t.$$.csv t.$$.csv.rules" EXIT ERR
+
+hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@"