diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index f62a02c9990..1885e8e89e9 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -140,7 +140,7 @@ readJournalFromCsv separator mrulesfile csvfile csvdata = -- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile records <- (either throwerr id . - dbg2 "validateCsv" . validateCsv skiplines . + dbg2 "validateCsv" . validateCsv rules skiplines . dbg2 "parseCsv") `fmap` parseCsv separator parsecfilename csvdata dbg1IO "first 3 csv records" $ take 3 records @@ -216,11 +216,22 @@ printCSV records = unlined (printRecord `map` records) unlined = concat . intersperse "\n" -- | Return the cleaned up and validated CSV data (can be empty), or an error. -validateCsv :: Int -> Either String CSV -> Either String [CsvRecord] -validateCsv _ (Left err) = Left err -validateCsv numhdrlines (Right rs) = validate $ drop numhdrlines $ filternulls rs +validateCsv :: CsvRules -> Int -> Either String CSV -> Either String [CsvRecord] +validateCsv _ _ (Left err) = Left err +validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ drop numhdrlines $ filternulls rs where filternulls = filter (/=[""]) + skipCount r = + case (getEffectiveAssignment rules r "end", getEffectiveAssignment rules r "skip") of + (Nothing, Nothing) -> Nothing + (Just _, _) -> Just maxBound + (Nothing, Just "") -> Just 1 + (Nothing, Just x) -> Just (read x) + applyConditionalSkips [] = [] + applyConditionalSkips (r:rest) = + case skipCount r of + Nothing -> r:(applyConditionalSkips rest) + Just cnt -> applyConditionalSkips (drop (cnt-1) rest) validate [] = Right [] validate rs@(_first:_) | isJust lessthan2 = let r = fromJust lessthan2 in @@ -266,7 +277,7 @@ defaultRulesText csvfile = T.pack $ unlines ,"" ,"account1 assets:bank:checking" ,"" - ,"fields date, description, amount" + ,"fields date, description, amount1" ,"" ,"#skip 1" ,"#newest-first" @@ -454,20 +465,8 @@ parseCsvRules rulesfile s = validateRules :: CsvRules -> Either String CsvRules validateRules rules = do unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n" - unless ((amount && not (amountin || amountout)) || - (not amount && (amountin && amountout)) || - balance) - $ Left $ unlines [ - "Please specify (as a top level CSV rule) either the amount field," - ,"both the amount-in and amount-out fields, or the balance field. Eg:" - ,"amount %2\n" - ] Right rules where - amount = isAssigned "amount" - amountin = isAssigned "amount-in" - amountout = isAssigned "amount-out" - balance = isAssigned "balance" || isAssigned "balance1" || isAssigned "balance2" isAssigned f = isJust $ getEffectiveAssignment rules [] f -- parsers @@ -553,8 +552,9 @@ fieldassignmentp :: CsvRulesParser (JournalFieldName, FieldTemplate) fieldassignmentp = do lift $ dbgparse 3 "trying fieldassignmentp" f <- journalfieldnamep - assignmentseparatorp - v <- fieldvalp + v <- choiceInState [ assignmentseparatorp >> fieldvalp + , lift eolof >> return "" + ] return (f,v) "field assignment" @@ -566,14 +566,19 @@ journalfieldnamep = do -- Transaction fields and pseudo fields for CSV conversion. -- Names must precede any other name they contain, for the parser -- (amount-in before amount; date2 before date). TODO: fix -journalfieldnames = [ - "account1" - ,"account2" - ,"amount-in" +journalfieldnames = + concat [[ "account" ++ i + ,"amount" ++ i ++ "-in" + ,"amount" ++ i ++ "-out" + ,"amount" ++ i + ,"balance" ++ i + ,"comment" ++ i + ,"currency" ++ i + ] | x <- [1..9], let i = show x] + ++ + ["amount-in" ,"amount-out" ,"amount" - ,"balance1" - ,"balance2" ,"balance" ,"code" ,"comment" @@ -582,17 +587,16 @@ journalfieldnames = [ ,"date" ,"description" ,"status" + ,"skip" -- skip and end are not really fields, but we list it here to allow conditional rules that skip records + ,"end" ] assignmentseparatorp :: CsvRulesParser () assignmentseparatorp = do lift $ dbgparse 3 "trying assignmentseparatorp" - choice [ - -- try (lift (skipMany spacenonewline) >> oneOf ":="), - try (lift (skipMany spacenonewline) >> char ':'), - spaceChar - ] - _ <- lift (skipMany spacenonewline) + _ <- choiceInState [ lift (skipMany spacenonewline) >> char ':' >> lift (skipMany spacenonewline) + , lift (skipSome spacenonewline) + ] return () fieldvalp :: CsvRulesParser String @@ -662,8 +666,9 @@ regexp = do type CsvRecord = [String] --- Convert a CSV record to a transaction using the rules, or raise an --- error if the data can not be parsed. +showRules rules record = + unlines $ catMaybes [ (("the "++fld++" rule is: ")++) <$> getEffectiveAssignment rules record fld | fld <- journalfieldnames] + transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction transactionFromCsvRecord sourcepos rules record = t where @@ -679,11 +684,11 @@ transactionFromCsvRecord sourcepos rules record = t mdateformat = mdirective "date-format" date = render $ fromMaybe "" $ mfieldtemplate "date" date' = fromMaybe (error' $ dateerror "date" date mdateformat) $ mparsedate date - mdate2 = maybe Nothing (Just . render) $ mfieldtemplate "date2" + mdate2 = render <$> mfieldtemplate "date2" mdate2' = maybe Nothing (maybe (error' $ dateerror "date2" (fromMaybe "" mdate2) mdateformat) Just . mparsedate) mdate2 dateerror datefield value mdateformat = unlines ["error: could not parse \""++value++"\" as a date using date format "++maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" show mdateformat - ,"the CSV record is: "++showRecord record + , showRecord record ,"the "++datefield++" rule is: "++(fromMaybe "required, but missing" $ mfieldtemplate datefield) ,"the date-format is: "++fromMaybe "unspecified" mdateformat ,"you may need to " @@ -703,58 +708,110 @@ transactionFromCsvRecord sourcepos rules record = t ["error: could not parse \""++str++"\" as a cleared status (should be *, ! or empty)" ,"the parse error is: "++customErrorBundlePretty err ] - code = maybe "" render $ mfieldtemplate "code" - description = maybe "" render $ mfieldtemplate "description" - comment = maybe "" render $ mfieldtemplate "comment" - precomment = maybe "" render $ mfieldtemplate "precomment" - currency = maybe (fromMaybe "" mdefaultcurrency) render $ mfieldtemplate "currency" - amountstr = (currency++) <$> simplifySign <$> getAmountStr rules record - maybeamount = either amounterror (Mixed . (:[])) <$> runParser (evalStateT (amountp <* eof) mempty) "" <$> T.pack <$> amountstr - amounterror err = error' $ unlines - ["error: could not parse \""++fromJust amountstr++"\" as an amount" - ,showRecord record - ,"the amount rule is: "++(fromMaybe "" $ mfieldtemplate "amount") - ,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency") - ,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency - ,"the parse error is: "++customErrorBundlePretty err - ,"you may need to " - ++"change your amount or currency rules, " - ++"or "++maybe "add a" (const "change your") mskip++" skip rule" - ] - amount1 = case maybeamount of - Just a -> a - Nothing | balance1 /= Nothing || balance2 /= Nothing -> nullmixedamt - Nothing -> error' $ "amount and balance have no value\n"++showRecord record - -- convert balancing amount to cost like hledger print, so eg if - -- amount1 is "10 GBP @@ 15 USD", amount2 will be "-15 USD". - amount2 = costOfMixedAmount (-amount1) + code = singleline $ maybe "" render $ mfieldtemplate "code" + description = singleline $ maybe "" render $ mfieldtemplate "description" + comment = singleline $ maybe "" render $ mfieldtemplate "comment" + precomment = singleline $ maybe "" render $ mfieldtemplate "precomment" + s `or` def = if null s then def else s - defaccount1 = fromMaybe "unknown" $ mdirective "default-account1" - defaccount2 = case isNegativeMixedAmount amount2 of - Just True -> "income:unknown" - _ -> "expenses:unknown" - account1 = T.pack $ maybe "" render (mfieldtemplate "account1") `or` defaccount1 - account2 = T.pack $ maybe "" render (mfieldtemplate "account2") `or` defaccount2 - balance1template = - case (mfieldtemplate "balance", mfieldtemplate "balance1") of - (Nothing, Nothing) -> Nothing - (balance, Nothing) -> balance - (Nothing, balance1) -> balance1 - (Just _, Just _) -> error' "Please use either balance or balance1, but not both" - balance1 = maybe Nothing (parsebalance "1".render) $ balance1template - balance2 = maybe Nothing (parsebalance "2".render) $ mfieldtemplate "balance2" - parsebalance n str + parsebalance currency n str | all isSpace str = Nothing | otherwise = Just $ (either (balanceerror n str) id $ runParser (evalStateT (amountp <* eof) mempty) "" $ T.pack $ (currency++) $ simplifySign str, nullsourcepos) balanceerror n str err = error' $ unlines ["error: could not parse \""++str++"\" as balance"++n++" amount" ,showRecord record - ,"the balance"++n++" rule is: "++(fromMaybe "" $ mfieldtemplate ("balance"++n)) - ,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency") + ,showRules rules record ,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency ,"the parse error is: "++customErrorBundlePretty err ] + parsePosting' number accountFld amountFld amountInFld amountOutFld balanceFld commentFld = + let currency = maybe (fromMaybe "" mdefaultcurrency) render $ + (mfieldtemplate ("currency"++number) `or `mfieldtemplate "currency") + amount = chooseAmount rules record currency amountFld amountInFld amountOutFld + account' = ((T.pack . render) <$> (mfieldtemplate accountFld + `or` mdirective ("default-account" ++ number))) + balance = (parsebalance currency number.render) =<< mfieldtemplate balanceFld + comment = T.pack $ maybe "" render $ mfieldtemplate commentFld + account = + case account' of + -- If account is explicitly "unassigned", suppress posting + -- Otherwise, generate posting with "expenses:unknown" account if we have amount/balance information + Just "" -> Nothing + Just account -> Just account + Nothing -> + -- If we have amount or balance assertion (which implies potential amount change), + -- but no account name, lets generate "expenses:unknown" account name. + case (amount, balance) of + (Just _, _ ) -> Just "expenses:unknown" + (_, Just _) -> Just "expenses:unknown" + (Nothing, Nothing) -> Nothing + in + case account of + Nothing -> Nothing + Just account -> + Just $ (number, posting {paccount=account, pamount=fromMaybe missingmixedamt amount, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance, pcomment = comment}) + + parsePosting number = + parsePosting' number + ("account"++number) + ("amount"++number) + ("amount"++number++"-in") + ("amount"++number++"-out") + ("balance"++number) + ("comment" ++ number) + + postingLegacy = parsePosting' "" "account1" "amount" "amount-in" "amount-out" "balance" "comment1" + posting1' = parsePosting "1" + posting1 = + case (postingLegacy,posting1') of + (Just (_,legacy), Nothing) -> Just ("1", legacy) + (Nothing, Just (_,posting1)) -> Just ("1", posting1) + (Just (_,legacy), Just (_,posting1)) -> + -- Here we merge legacy fields such as "amount" with "amount1", etc + -- Account and Comment would be the same by construction + let balanceassertion = (pbalanceassertion legacy) `or` (pbalanceassertion posting1) + amount = + let al = pamount legacy + a1 = pamount posting1 + in + if al == a1 then al + else + case (isZeroMixedAmount al, isZeroMixedAmount a1) of + (True, _) -> a1 + (False, True) -> al + (False, False) -> + error' $ unlines [ "amount/amount-in/amount-out and amount1/amount1-in/amount1-out produced conflicting values" + , showRecord record + , showRules rules record + , "amount/amount-in/amount-out is " ++ showMixedAmount al + , "amount1/amount1-in/amount1-out is " ++ showMixedAmount a1 + ] + in Just $ ("1", posting {paccount=paccount posting1, pamount=amount, ptransaction=Just t, pbalanceassertion=balanceassertion, pcomment = pcomment posting1}) + (Nothing, Nothing) -> Nothing + postings' = catMaybes $ posting1:[ parsePosting i | x<-[2..9], let i = show x] + + improveUnknownAccountName p = + if paccount p /="expenses:unknown" + then p + else case isNegativeMixedAmount (pamount p) of + Just True -> p{paccount = "income:unknown"} + Just False -> p{paccount = "expenses:unknown"} + _ -> p + + postings = + case postings' of + -- To be compatible with the behavior of the old code which allowed two postings only, we enforce + -- second posting when rules generated just first of them. + -- When we have srictly first and second posting, but second posting does not have amount, we fill it in. + [("1",posting1)] -> + [posting1,improveUnknownAccountName (posting{paccount="expenses:unknown", pamount=costOfMixedAmount(-(pamount posting1)), ptransaction=Just t})] + [("1",posting1),("2",posting2)] -> + case (pamount posting1 == missingmixedamt , pamount posting2 == missingmixedamt) of + (False, True) -> [posting1, improveUnknownAccountName (posting2{pamount=costOfMixedAmount(-(pamount posting1))})] + _ -> [posting1, posting2] + _ -> map snd postings' + -- build the transaction t = nulltransaction{ tsourcepos = genericSourcePos sourcepos, @@ -764,39 +821,56 @@ transactionFromCsvRecord sourcepos rules record = t tcode = T.pack code, tdescription = T.pack description, tcomment = T.pack comment, - tprecedingcomment = T.pack precomment, - tpostings = - [posting {paccount=account1, pamount=amount1, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance1} - ,posting {paccount=account2, pamount=amount2, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance2} - ] + tprecedingcomment = T.pack precomment, + tpostings = postings } toAssertion (a, b) = assertion{ baamount = a, baposition = b } -getAmountStr :: CsvRules -> CsvRecord -> Maybe String -getAmountStr rules record = +chooseAmount :: CsvRules -> CsvRecord -> String -> String -> String -> String -> Maybe MixedAmount +chooseAmount rules record currency amountFld amountInFld amountOutFld = let - mamount = getEffectiveAssignment rules record "amount" - mamountin = getEffectiveAssignment rules record "amount-in" - mamountout = getEffectiveAssignment rules record "amount-out" - render = fmap (strip . renderTemplate rules record) + mamount = getEffectiveAssignment rules record amountFld + mamountin = getEffectiveAssignment rules record amountInFld + mamountout = getEffectiveAssignment rules record amountOutFld + parse amt = notZero =<< (parseAmount currency <$> notEmpty =<< (strip . renderTemplate rules record) <$> amt) in - case (render mamount, render mamountin, render mamountout) of - (Just "", Nothing, Nothing) -> Nothing + case (parse mamount, parse mamountin, parse mamountout) of + (Nothing, Nothing, Nothing) -> Nothing (Just a, Nothing, Nothing) -> Just a - (Nothing, Just "", Just "") -> error' $ "neither amount-in or amount-out has a value\n" - ++ " record: " ++ showRecord record - (Nothing, Just i, Just "") -> Just i - (Nothing, Just "", Just o) -> Just $ negateStr o - (Nothing, Just i, Just o) -> error' $ "both amount-in and amount-out have a value\n" - ++ " amount-in: " ++ i ++ "\n" - ++ " amount-out: " ++ o ++ "\n" + (Nothing, Just i, Nothing) -> Just i + (Nothing, Nothing, Just o) -> Just $ negate o + (Nothing, Just i, Just o) -> error' $ "both "++amountInFld++" and "++amountOutFld++" have a value\n" + ++ " "++amountInFld++": " ++ show i ++ "\n" + ++ " "++amountOutFld++": " ++ show o ++ "\n" ++ " record: " ++ showRecord record - _ -> error' $ "found values for amount and for amount-in/amount-out\n" - ++ "please use either amount or amount-in/amount-out\n" + _ -> error' $ "found values for "++amountFld++" and for "++amountInFld++"/"++amountOutFld++"\n" + ++ "please use either "++amountFld++" or "++amountInFld++"/"++amountOutFld++"\n" ++ " record: " ++ showRecord record + where + notZero amt = if isZeroMixedAmount amt then Nothing else Just amt + notEmpty str = if str=="" then Nothing else Just str + + parseAmount currency amountstr = + either (amounterror amountstr) (Mixed . (:[])) + <$> runParser (evalStateT (amountp <* eof) mempty) "" + <$> T.pack + <$> (currency++) + <$> simplifySign + <$> amountstr + + amounterror amountstr err = error' $ unlines + ["error: could not parse \""++fromJust amountstr++"\" as an amount" + ,showRecord record + ,showRules rules record + ,"the default-currency is: "++fromMaybe "unspecified" (getDirective "default-currency" rules) + ,"the parse error is: "++customErrorBundlePretty err + ,"you may need to " + ++"change your amount or currency rules, " + ++"or add or change your skip rule" + ] type CsvAmountString = String @@ -861,7 +935,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f -- | Render a field assigment's template, possibly interpolating referenced -- CSV field values. Outer whitespace is removed from interpolated values. renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String -renderTemplate rules record t = regexReplaceBy "%[A-z0-9]+" replace t +renderTemplate rules record t = regexReplaceBy "%[A-z0-9_-]+" replace t where replace ('%':pat) = maybe pat (\i -> strip $ atDef "" record (i-1)) mindex where diff --git a/hledger-lib/Hledger/Utils/String.hs b/hledger-lib/Hledger/Utils/String.hs index 23fb3163839..03342317dfe 100644 --- a/hledger-lib/Hledger/Utils/String.hs +++ b/hledger-lib/Hledger/Utils/String.hs @@ -21,6 +21,7 @@ module Hledger.Utils.String ( lstrip, rstrip, chomp, + singleline, elideLeft, elideRight, formatString, @@ -76,6 +77,10 @@ rstrip = reverse . lstrip . reverse chomp :: String -> String chomp = reverse . dropWhile (`elem` "\r\n") . reverse +-- | Remove consequtive line breaks, replacing them with single space +singleline :: String -> String +singleline = unwords . filter (/="") . (map strip) . lines + stripbrackets :: String -> String stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String diff --git a/hledger-lib/hledger_csv.m4.md b/hledger-lib/hledger_csv.m4.md index 03d0fd22cc6..4055c857f9b 100644 --- a/hledger-lib/hledger_csv.m4.md +++ b/hledger-lib/hledger_csv.m4.md @@ -38,7 +38,7 @@ At minimum, the rules file must identify the date and amount fields. It's often necessary to specify the date format, and the number of header lines to skip, also. Eg: ``` -fields date, _, _, amount +fields date, _, _, amount1 date-format %d/%m/%Y skip 1 ``` @@ -55,7 +55,7 @@ A more complete example: skip 1 # name the csv fields (and assign the transaction's date, amount and code) -fields date, _, toorfrom, name, amzstatus, amount, fees, code +fields date, _, toorfrom, name, amzstatus, amount1, fees, code # how to parse the date date-format %b %-d, %Y @@ -64,7 +64,7 @@ date-format %b %-d, %Y description %toorfrom %name # save these fields as tags -comment status:%amzstatus, fees:%fees +comment status:%amzstatus # set the base account for all transactions account1 assets:amazon @@ -72,6 +72,9 @@ account1 assets:amazon # flip the sign on the amount amount -%amount +# Put fees in a separate posting +amount3 %fees +comment3 fees ``` For more examples, see [Convert CSV files](https://github.com/simonmichael/hledger/wiki/Convert-CSV-files). @@ -92,7 +95,7 @@ You'll need this whenever your CSV data contains header lines. Eg: - +This can also be used in a conditional block to ignore certain CSV records. ```rules # ignore the first CSV line skip 1 @@ -133,7 +136,13 @@ date-format %-m/%-d/%Y %l:%M %p This (a) names the CSV fields, in order (names may not contain whitespace; uninteresting names may be left blank), and (b) assigns them to journal entry fields if you use any of these standard field names: -`date`, `date2`, `status`, `code`, `description`, `comment`, `account1`, `account2`, `amount`, `amount-in`, `amount-out`, `currency`, `balance`, `balance1`, `balance2`. + +Fields `date`, `date2`, `status`, `code`, `description` will form transaction description. + +An assignment to any of `accountN`, `amountN`, `amountN-in`, `amountN-out`, `balanceN` or `currencyN` will generate a posting (though it's your responsibility to ensure it is a well formed one). Normally the `N`'s are consecutive starting from 1 but it's not required. One posting will be generated for each unique `N`. If you wish to supply a comment for the posting, use `commentN`, though comment on its own will not cause posting to be generated. + +Fields `amount`, `amount-in`, `amount-out`, `currency`, `balance` and `comment` are treated as aliases for `amount1`, and so on. If your rules file leads to both aliased fields having different values, `hledger` will raise an error. + Eg: ```rules # use the 1st, 2nd and 4th CSV fields as the entry's date, description and amount, @@ -142,9 +151,11 @@ Eg: # CSV field: # 1 2 3 4 5 6 7 8 # entry field: -fields date, description, , amount, , , somefield, anotherfield +fields date, description, , amount1, , , somefield, anotherfield ``` +For backwards compatibility, we treat posting 1 specially. If your rules generated just posting 1, another posting would be added to your transaction to balance it. If your rules generated posting 1 and posting 2, but amount in the posting 2 is empty, hledger will fill it out with the opposite of posting 1. This special handling is needed to ensure smooth upgrade path from version 1.15. + ## field assignment *`ENTRYFIELDNAME`* *`FIELDVALUE`* @@ -177,12 +188,23 @@ Note, interpolation strips any outer whitespace, so a CSV value like *`PATTERN`*...\     *`FIELDASSIGNMENTS`*... +`if` *`PATTERN`*\ +*`PATTERN`*...\ +    *`skip N`*... + +`if` *`PATTERN`*\ +*`PATTERN`*...\ +    *`end`*... + This applies one or more field assignments, only to those CSV records matched by one of the PATTERNs. The patterns are case-insensitive regular expressions which match anywhere within the whole CSV record (it's not yet possible to match within a specific field). When there are multiple patterns they can be written on separate lines, unindented. The field assignments are on separate lines indented by at least one space. + +Instead of field assignments you can specify `skip` or `skip 1` to skip this record, `skip N` to skip the next N records (including the one that matchied) or `end` to skip the rest of the file. + Examples: ```rules # if the CSV record contains "groceries", set account2 to "expenses:groceries" @@ -231,22 +253,21 @@ The order of same-day entries will be preserved ## CSV accounts -Each journal entry will have two [postings](journal.html#postings), to `account1` and `account2` respectively. -It's not yet possible to generate entries with more than two postings. +Each journal entry will have at least two [postings](journal.html#postings), to `account1` and some other account (usually `account2`). It's conventional and recommended to use `account1` for the account whose CSV we are reading. ## CSV amounts -A transaction [amount](journal.html#amounts) must be set, in one of these ways: +A posting [amount](journal.html#amounts) could be set in one of these ways: -- with an `amount` field assignment, which sets the first posting's amount +- with an `amountN` field assignment, which sets the Nth posting's amount - (When the CSV has debit and credit amounts in separate fields:)\ - with field assignments for the `amount-in` and `amount-out` pseudo + with field assignments for the `amountN-in` and `amountN-out` pseudo fields (both of them). Whichever one has a value will be used, with appropriate sign. If both contain a value, it might not work so well. -- or implicitly by means of a [balance assignment](journal.html#balance-assignments) (see below). +- with `balanceN` field assignment that creates a [balance assignment](journal.html#balance-assignments) (see below). There is some special handling for sign in amounts: @@ -254,24 +275,53 @@ There is some special handling for sign in amounts: - If an amount value begins with a double minus sign, those will cancel out and be removed. If the currency/commodity symbol is provided as a separate CSV field, -assign it to the `currency` pseudo field; the symbol will be prepended +assign it to the `currency` pseudo field (applicable to the whole transaction) or `currencyN` (applicable to Nth posting only); the symbol will be prepended to the amount (TODO: when there is an amount). -Or, you can use an `amount` [field assignment](#field-assignment) for more control, eg: +Or, you can use an `amountN` [field assignment](#field-assignment) for more control, eg: ``` -fields date,description,currency,amount -amount %amount %currency +fields date,description,currency,amount1 +amount1 %amount1 %currency ``` ## CSV balance assertions/assignments If the CSV includes a running balance, you can assign that to one of the pseudo fields -`balance` (or `balance1`) or `balance2`. +`balance` (or `balance1`), `balance2`, ... up to `balance9`. This will generate a [balance assertion](journal.html#balance-assertions) (or if the amount is left empty, a [balance assignment](journal.html#balance-assignments)), -on the first or second posting, -whenever the running balance field is non-empty. -(TODO: [#1000](https://github.com/simonmichael/hledger/issues/1000)) +on the appropriate posting, whenever the running balance field is non-empty. + +## References to other fields and evaluation order + +Field assignments could include references to other fields or even to the same field you are trying to assign: + +``` +fields date,description,currency,amount1 + +amount1 %amount1 USD +amount1 %amount1 EUR +amount1 %amount1 %currency + +if SOME_REGEXP + amount1 %amount1 GBP +``` +This is how this file would be evaluated. + +First, parts of CVS record are assigned according to `fields` directive. + +Then all other field assignments -- written at top level, or included in `if` blocks -- are considered to see if they should be applied. They are checked in the order they are written, with later assignment overwriting earlier ones. + +Once full set of field assignments that should be applied is known, their values are computed, and this is when all `%` references are evaluated. + +So for a particular row from CSV file, value from fourth column would be assigned to `amount1`. + +Then `hledger` will decide that `amount1` would have to be amended to `%amount1 USD`, but this will not happen immediately. This choice would be replaced by decision to rewrite `amount1` to `%amount EUR`, which will in turn be thrown away in favor of `%amount1 %currency`. If the `if` block condition will match the row, it will assign `amount1` to `%amount1 GBP`. + +Overall, we will end up with one of the two alternatives for `amount1` - either `%amount1 %currency` or `%amount1 GBP`. + +Now substitution of all referenced values will happen, using the current values for `%amount1` and `currency`, which were provided by the `fields` directive. + ## Reading multiple CSV files diff --git a/tests/csv.test b/tests/csv.test index 8486d55f927..174d3474066 100644 --- a/tests/csv.test +++ b/tests/csv.test @@ -1,11 +1,13 @@ -# These tests read CSV from stdin for convenience, so to ensure we get the CSV reader's -# error, the csv: prefix is used. -# -# The final cleanup command is chained with && so as not to mask hledger's exit code, -# but this means a temp file is left behind whenever hledger fails. What TODO ? - # 1. read CSV to hledger journal format -$ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +10/2009/09,Flubber Co,50 +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct + +$ ./hledger-csv 2009/09/10 Flubber Co assets:myacct $50 income:unknown $-50 @@ -16,7 +18,16 @@ $ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $ < 10/2009/09,Flubber Co🎅,50, 11/2009/09,Flubber Co🎅,,50 -$ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >t.$$.csv.rules ; hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +RULES +account1 Assets:MyAccount +date %1 +date-format %d/%Y/%m +description %2 +amount-in %3 +amount-out %4 +currency $ + +$ ./hledger-csv 2009/09/10 Flubber Co🎅 Assets:MyAccount $50 income:unknown $-50 @@ -28,7 +39,18 @@ $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescrip >=0 # 3. handle conditions assigning multiple fields -$ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\nif Flubber\n account2 acct\n comment cmt' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +10/2009/09,Flubber Co,50 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if Flubber + account2 acct + comment cmt +$ ./hledger-csv 2009/09/10 Flubber Co ; cmt assets:myacct $50 acct $-50 @@ -36,7 +58,16 @@ $ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $ >=0 # 4. read CSV with balance field -$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +10/2009/09,Flubber Co,50,123 + +RULES +fields date, description, amount, balance +date-format %d/%Y/%m +currency $ +account1 assets:myacct + +$ ./hledger-csv 2009/09/10 Flubber Co assets:myacct $50 = $123 income:unknown $-50 @@ -44,7 +75,17 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc >=0 # 5. read CSV with empty balance field -$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n11/2009/09,Blubber Co,60,\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +10/2009/09,Flubber Co,50,123 +11/2009/09,Blubber Co,60, + +RULES +fields date, description, amount, balance +date-format %d/%Y/%m +currency $ +account1 assets:myacct + +$ ./hledger-csv 2009/09/10 Flubber Co assets:myacct $50 = $123 income:unknown $-50 @@ -56,7 +97,17 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc >=0 # 6. read CSV with only whitespace in balance field -$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n11/2009/09,Blubber Co,60, \n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +10/2009/09,Flubber Co,50,123 +11/2009/09,Blubber Co,60, + +RULES +fields date, description, amount, balance +date-format %d/%Y/%m +currency $ +account1 assets:myacct + +$ ./hledger-csv 2009/09/10 Flubber Co assets:myacct $50 = $123 income:unknown $-50 @@ -68,7 +119,22 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc >=0 # 7. read CSV with rule double-negating column -$ printf 'skip 1\n\ncurrency $\n\nfields date, payee, payment\n\namount -%%payment\naccount1 liabilities:bank\naccount2 expense:other' >t.$$.csv.rules; printf 'date,payee,amount\n2009/10/9,Flubber Co,50\n2009/11/09,Merchant Credit,-60\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +date,payee,amount +2009/10/9,Flubber Co,50 +2009/11/09,Merchant Credit,-60 + +RULES +skip 1 + +currency $ + +fields date, payee, payment + +amount -%payment +account1 liabilities:bank +account2 expense:other +$ ./hledger-csv 2009/10/09 liabilities:bank $-50 expense:other $50 @@ -83,7 +149,16 @@ $ printf 'skip 1\n\ncurrency $\n\nfields date, payee, payment\n\namount -%%paym < 10/2009/09;Flubber Co🎅;50; 11/2009/09;Flubber Co🎅;;50 -$ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger --separator ';' -f csv:- --rules-file rules.$$ print && rm -rf rules.$$ +RULES +account1 Assets:MyAccount +date %1 +date-format %d/%Y/%m +description %2 +amount-in %3 +amount-out %4 +currency $ + +$ ./hledger-csv --separator ';' 2009/09/10 Flubber Co🎅 Assets:MyAccount $50 income:unknown $-50 @@ -95,7 +170,16 @@ $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescrip >=0 # 9. read CSV with balance2 field -$ printf 'fields date, description, amount, balance2\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +10/2009/09,Flubber Co,50,123 + +RULES +fields date, description, amount, balance2 +date-format %d/%Y/%m +currency $ +account1 assets:myacct + +$ ./hledger-csv 2009/09/10 Flubber Co assets:myacct $50 income:unknown $-50 = $123 @@ -103,7 +187,16 @@ $ printf 'fields date, description, amount, balance2\ndate-format %%d/%%Y/%%m\n >=0 # 10. read CSV with balance1 and balance2 fields -$ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,321,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +< +10/2009/09,Flubber Co,50,321,123 + +RULES +fields date, description, amount, balance1, balance2 +date-format %d/%Y/%m +currency $ +account1 assets:myacct + +$ ./hledger-csv 2009/09/10 Flubber Co assets:myacct $50 = $321 income:unknown $-50 = $123 @@ -111,6 +204,291 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d >=0 +# 11. More than two postings +< +10/2009/09,Flubber Co,50,321,123,0.234,VAT + +RULES +fields date, description, amount, balance1, balance2, amount3,comment3 +date-format %d/%Y/%m +currency $ +account1 assets:myacct +account3 expenses:tax + +$ ./hledger-csv +2009/09/10 Flubber Co + assets:myacct $50 = $321 + expenses:unknown = $123 + expenses:tax $0.234 ; VAT + +>=0 + +# 12. More than two postings and different currencies +< +10/2009/09,Flubber Co,50,321,123,£,0.234,VAT + +RULES +fields date, description, amount, balance1, balance2, currency3, amount3,comment3 +date-format %d/%Y/%m +currency $ +account1 assets:myacct +account3 expenses:tax + +$ ./hledger-csv +2009/09/10 Flubber Co + assets:myacct $50 = $321 + expenses:unknown = $123 + expenses:tax £0.234 ; VAT + +>=0 + +# 13. reading CSV with in-field and out-field, where one could be zero +< +10/2009/09,Flubber Co🎅,50,0 +11/2009/09,Flubber Co🎅,0.00,50 +RULES +account1 Assets:MyAccount +date %1 +date-format %d/%Y/%m +description %2 +amount-in %3 +amount-out %4 +currency $ + +$ ./hledger-csv +2009/09/10 Flubber Co🎅 + Assets:MyAccount $50 + income:unknown $-50 + +2009/09/11 Flubber Co🎅 + Assets:MyAccount $-50 + expenses:unknown $50 + +>=0 + +# 14. multiline descriptions +< +10/2009/09,"Flubber Co + + + +Co +Co + + + + +",50 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +$ ./hledger-csv +2009/09/10 Flubber Co Co Co + assets:myacct $50 + income:unknown $-50 + +>=0 + +# 15. recursive interpolation +< +myacct,10/2009/09,Flubber Co,50, + +RULES + +fields account1, date, description, amount-in, amount-out +date-format %d/%Y/%m +currency $ +if Flubber + account1 assets:%account1 + amount-in (%amount-in) +$ ./hledger-csv +2009/09/10 Flubber Co + assets:myacct $-50 + expenses:unknown $50 + +>=0 + +# 16. Real life-ish paypal parsing example +< +"12/22/2018","06:22:50","PST","Someone","Subscription Payment","Completed","USD","10.00","-0.59","9.41","someone@some.where","simon@joyful.com","123456789","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","57.60","" + +RULES +fields date, time, timezone, description, type, status_, currency, grossamount, feeamount, netamount, fromemail, toemail, code, itemtitle, itemid, referencetxnid, receiptid, balance, note +account1 sm:assets:online:paypal +amount1 %netamount +account2 sm:expenses:unknown +account3 JS:expenses:banking:paypal +amount3 %feeamount +balance %18 +code %13 +currency $ +date %1 +date-format %m/%d/%Y +description %description for %itemtitle +$ ./hledger-csv +2018/12/22 (123456789) Someone for Joyful Systems + sm:assets:online:paypal $9.41 = $57.60 + sm:expenses:unknown + JS:expenses:banking:paypal $-0.59 + +>=0 + +# 17. Show that #415 is fixed +< +"2016/01/01","$1" +"2016/02/02","$1,000.00" +RULES +account1 unknown +amount %2 +date %1 +date-format %Y/%m/%d +$ ./hledger-csv | hledger balance -f - --no-total + $-1,001.00 income:unknown + $1,001.00 unknown +>=0 + +# 18. Conditional skips +< +HEADER +10/2009/09,Flubber Co,50 +MIDDLE SKIP THIS LINE +AND THIS +AND THIS ONE +10/2009/09,Flubber Co,50 +*** END OF FILE *** +More lines of the trailer here +They all should be ignored +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct + +if HEADER + skip + +if +END OF FILE + end + +if MIDDLE + skip 3 + +$ ./hledger-csv +2009/09/10 Flubber Co + assets:myacct $50 + income:unknown $-50 + +2009/09/10 Flubber Co + assets:myacct $50 + income:unknown $-50 + +>=0 + +# 19. Lines with just balance, no amount (#1000) +< +2018-10-15,100 +2018-10-16,200 +2018-10-17,300 +RULES +fields date,bal + +balance EUR %bal +date-format %Y-%m-%d +description Assets Update +account1 assets +account2 income +$ ./hledger-csv +2018/10/15 Assets Update + assets = EUR 100 + income + +2018/10/16 Assets Update + assets = EUR 200 + income + +2018/10/17 Assets Update + assets = EUR 300 + income + +>=0 + +# 20. Test for #1001 - empty assignment to amount show not eat next line +< +2018-10-15,100 +2018-10-16,200 +2018-10-17,300 +RULES +fields date,bal + +balance EUR %bal +date-format %Y-%m-%d +description Assets Update +account1 assets +account2 income +if 2018 + amount + comment Dont eat me + balance + comment Dont eat me +$ ./hledger-csv +2018/10/15 Assets Update ; Dont eat me + assets + income + +2018/10/16 Assets Update ; Dont eat me + assets + income + +2018/10/17 Assets Update ; Dont eat me + assets + income + +>=0 + +# 21. Amountless postings and conditional third posting +< +"12/22/2018","06:22:50","PST","Someone","Subscription Payment","Completed","USD","10.00","-0.59","9.41","someone@some.where","simon@joyful.com","123456789","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","57.60","" +"12/22/2018","06:22:50","PST","Someone","Empty fee","Completed","USD","10.00","","6.66","someone@some.where","simon@joyful.com","987654321","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","99.60","" +"12/22/2018","06:22:50","PST","Someone","Conditional Empty fee","Completed","USD","10.00","-1.23","7.77","someone@some.where","simon@joyful.com","10101010101","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","88.66","" + +RULES +fields date, time, timezone, description, type, status_, currency, grossamount, feeamount, netamount, fromemail, toemail, code, itemtitle, itemid, referencetxnid, receiptid, balance, note +account1 sm:assets:online:paypal +amount1 %netamount +account2 sm:expenses:unknown +account3 JS:expenses:banking:paypal +amount3 %feeamount +balance %18 +code %13 +currency $ +date %1 +date-format %m/%d/%Y +description %description for %itemtitle +if Conditional Empty Fee + account3 + +$ ./hledger-csv +2018/12/22 (123456789) Someone for Joyful Systems + sm:assets:online:paypal $9.41 = $57.60 + sm:expenses:unknown + JS:expenses:banking:paypal $-0.59 + +2018/12/22 (987654321) Someone for Joyful Systems + sm:assets:online:paypal $6.66 = $99.60 + sm:expenses:unknown + JS:expenses:banking:paypal + +2018/12/22 (10101010101) Someone for Joyful Systems + sm:assets:online:paypal $7.77 = $88.66 + sm:expenses:unknown $-7.77 + +>=0 + + # . TODO: without --separator gives obscure error # | # 1 | 10/2009/09;Flubber Co🎅;50; @@ -119,10 +497,18 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d # < # 10/2009/09;Flubber Co🎅;50; # 11/2009/09;Flubber Co🎅;;50 -# $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger -f csv:- --rules-file rules.$$ print && rm -rf rules.$$ +# RULES +# account1 Assets:MyAccount +# date %1 +# date-format %d/%Y/%m +# description %2 +# amount-in %3 +# amount-out %4 +# currency $ +# $ ./hledger-csv # 2009/09/10 Flubber Co🎅 # Assets:MyAccount $50 -# income:unknown $-50 +# expenses:unknown $-50 # # 2009/09/11 Flubber Co🎅 # Assets:MyAccount $-50 @@ -134,10 +520,18 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d # < # 10/2009/09 Flubber Co🎅 50 # 11/2009/09 Flubber Co🎅 50 -# $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger --separator "\t" -f csv:- --rules-file rules.$$ print && rm -rf rules.$$ +# RULES +# account1 Assets:MyAccount +# date %1 +# date-format %d/%Y/%m +# description %2 +# amount-in %3 +# amount-out %4 +# currency $ +# $ ./hledger-csv # 2009/09/10 Flubber Co🎅 # Assets:MyAccount $50 -# income:unknown $-50 +# expenses:unknown $-50 # # 2009/09/11 Flubber Co🎅 # Assets:MyAccount $-50 diff --git a/tests/hledger-csv b/tests/hledger-csv new file mode 100755 index 00000000000..8ea96a06ddc --- /dev/null +++ b/tests/hledger-csv @@ -0,0 +1,15 @@ +#!/bin/bash +# +# This scripts expects stdin formatted like this: +# +# RULES +# +# +awk -vCSV="t.$$.csv" -vRULES="t.$$.csv.rules" ' +BEGIN{output=CSV} +/^RULES/{output=RULES} +!/^RULES/{print $0 >output}' + +trap "rm -f t.$$.csv t.$$.csv.rules" EXIT ERR + +hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@"