Skip to content

Commit

Permalink
register: wide-character-aware layout (#242)
Browse files Browse the repository at this point in the history
Wide characters, eg chinese/japanese/korean characters, are typically
rendered wider than latin characters. In some applications (eg gnome
terminal or osx terminal) and fonts (eg monaco) they are exactly double
width. This is a start at making hledger aware of this. A register
report containing wide characters (in descriptions, account names, or
commodity symbols) should now align its columns correctly, when viewed
with a suitable font and application.
  • Loading branch information
simonmichael committed Sep 29, 2015
1 parent 09430f0 commit 5b5e5ee
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 18 deletions.
8 changes: 4 additions & 4 deletions hledger-lib/Hledger/Data/AccountName.hs
Expand Up @@ -118,17 +118,17 @@ elideAccountName width s
names = splitOn ", " $ take (length s - 8) s
widthpername = (max 0 (width - 8 - 2 * (max 1 (length names) - 1))) `div` length names
in
elideLeft width $
elideLeftWidth width False $
(++" (split)") $
intercalate ", " $
[accountNameFromComponents $ elideparts widthpername [] $ accountNameComponents s' | s' <- names]
| otherwise =
elideLeft width $ accountNameFromComponents $ elideparts width [] $ accountNameComponents s
elideLeftWidth width False $ accountNameFromComponents $ elideparts width [] $ accountNameComponents s
where
elideparts :: Int -> [String] -> [String] -> [String]
elideparts width done ss
| length (accountNameFromComponents $ done++ss) <= width = done++ss
| length ss > 1 = elideparts width (done++[take 2 $ head ss]) (tail ss)
| strWidth (accountNameFromComponents $ done++ss) <= width = done++ss
| length ss > 1 = elideparts width (done++[takeWidth 2 $ head ss]) (tail ss)
| otherwise = done++ss

-- | Keep only the first n components of an account name, where n
Expand Down
93 changes: 93 additions & 0 deletions hledger-lib/Hledger/Utils/String.hs
@@ -1,3 +1,5 @@
-- | String formatting helpers, starting to get a bit out of control.

module Hledger.Utils.String (
-- * misc
lowercase,
Expand Down Expand Up @@ -27,6 +29,11 @@ module Hledger.Utils.String (
elideLeft,
elideRight,
formatString,
-- * wide-character-aware single-line layout
strWidth,
takeWidth,
elideLeftWidth,
elideRightWidth,
-- * multi-line layout
concatTopPadded,
concatBottomPadded,
Expand Down Expand Up @@ -251,3 +258,89 @@ fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline
fit w = take w . (++ repeat ' ')
blankline = replicate w ' '

-- Functions below are aware of double-width characters eg in CJK text.

-- | Wide-character-aware string clipping to the specified width, with an ellipsis on the right.
-- When the second argument is true, also right-pad with spaces to the specified width if needed.
elideLeftWidth :: Int -> Bool -> String -> String
elideLeftWidth width pad s
| strWidth s > width = ellipsis ++ reverse (takeWidth (width - length ellipsis) $ reverse s)
| otherwise = reverse (takeWidth width $ reverse s ++ padding)
where
ellipsis = ".."
padding = if pad then repeat ' ' else ""

-- | Wide-character-aware string clipping to the specified width, with an ellipsis on the left.
-- When the second argument is true, also left-pad with spaces to the specified width if needed.
elideRightWidth :: Int -> Bool -> String -> String
elideRightWidth width pad s
| strWidth s > width = takeWidth (width - length ellipsis) s ++ ellipsis
| otherwise = takeWidth width $ s ++ padding
where
ellipsis = ".."
padding = if pad then repeat ' ' else ""

-- | Double-width-character-aware string truncation. Take as many
-- characters as possible from a string without exceeding the
-- specified width. Eg takeWidth 3 "りんご" = "り".
takeWidth :: Int -> String -> String
takeWidth _ "" = ""
takeWidth 0 _ = ""
takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs
| otherwise = ""
where cw = charWidth c

-- from Pandoc (copyright John MacFarlane, GPL)
-- see also http://unicode.org/reports/tr11/#Description

-- | Get real length of string, taking into account combining and
-- double-width characters.
strWidth :: String -> Int
strWidth = foldr (\a b -> charWidth a + b) 0

-- | Returns the width of a character in a monospace font: 0 for a
-- combining character, 1 for a regular character, 2 for an East Asian
-- wide character.
charWidth :: Char -> Int
charWidth c =
case c of
_ | c < '\x0300' -> 1
| c >= '\x0300' && c <= '\x036F' -> 0 -- combining
| c >= '\x0370' && c <= '\x10FC' -> 1
| c >= '\x1100' && c <= '\x115F' -> 2
| c >= '\x1160' && c <= '\x11A2' -> 1
| c >= '\x11A3' && c <= '\x11A7' -> 2
| c >= '\x11A8' && c <= '\x11F9' -> 1
| c >= '\x11FA' && c <= '\x11FF' -> 2
| c >= '\x1200' && c <= '\x2328' -> 1
| c >= '\x2329' && c <= '\x232A' -> 2
| c >= '\x232B' && c <= '\x2E31' -> 1
| c >= '\x2E80' && c <= '\x303E' -> 2
| c == '\x303F' -> 1
| c >= '\x3041' && c <= '\x3247' -> 2
| c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
| c >= '\x3250' && c <= '\x4DBF' -> 2
| c >= '\x4DC0' && c <= '\x4DFF' -> 1
| c >= '\x4E00' && c <= '\xA4C6' -> 2
| c >= '\xA4D0' && c <= '\xA95F' -> 1
| c >= '\xA960' && c <= '\xA97C' -> 2
| c >= '\xA980' && c <= '\xABF9' -> 1
| c >= '\xAC00' && c <= '\xD7FB' -> 2
| c >= '\xD800' && c <= '\xDFFF' -> 1
| c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
| c >= '\xF900' && c <= '\xFAFF' -> 2
| c >= '\xFB00' && c <= '\xFDFD' -> 1
| c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
| c >= '\xFE10' && c <= '\xFE19' -> 2
| c >= '\xFE20' && c <= '\xFE26' -> 1
| c >= '\xFE30' && c <= '\xFE6B' -> 2
| c >= '\xFE70' && c <= '\xFEFF' -> 1
| c >= '\xFF01' && c <= '\xFF60' -> 2
| c >= '\xFF61' && c <= '\x16A38' -> 1
| c >= '\x1B000' && c <= '\x1B001' -> 2
| c >= '\x1D000' && c <= '\x1F1FF' -> 1
| c >= '\x1F200' && c <= '\x1F251' -> 2
| c >= '\x1F300' && c <= '\x1F773' -> 1
| c >= '\x20000' && c <= '\x3FFFD' -> 2
| otherwise -> 1

39 changes: 25 additions & 14 deletions hledger/Hledger/Cli/Register.hs
Expand Up @@ -19,7 +19,6 @@ import Data.Maybe
import System.Console.CmdArgs.Explicit
import Text.CSV
import Test.HUnit
import Text.Printf

import Hledger
import Hledger.Cli.CliOptions
Expand Down Expand Up @@ -119,14 +118,29 @@ tests_postingsReportAsText = [
--
postingsReportItemAsText :: CliOpts -> PostingsReportItem -> String
postingsReportItemAsText opts (mdate, menddate, mdesc, p, b) =
-- use elide*Width to be wide-char-aware
intercalate "\n" $
[printf ("%-"++datew++"s %-"++descw++"s %-"++acctw++"s %"++amtw++"s %"++balw++"s")
date desc acct amtfirstline balfirstline]
[concat [elideRightWidth datewidth True date
," "
,elideRightWidth descwidth True desc
," "
,elideRightWidth acctwidth True acct
," "
,elideLeftWidth amtwidth True amtfirstline
," "
,elideLeftWidth balwidth True balfirstline
]]
++
[printf (spacer ++ "%"++amtw++"s %"++balw++"s") a b | (a,b) <- zip amtrest balrest ]

[concat [spacer
,elideLeftWidth amtwidth True a
," "
,elideLeftWidth balwidth True b
]
| (a,b) <- zip amtrest balrest
]
where
-- calculate widths
-- XXX should be smarter, eg resize amount columns when needed; cf hledger-ui
(totalwidth,mdescwidth) = registerWidthsFromOpts opts
amtwidth = 12
balwidth = 12
Expand All @@ -142,16 +156,16 @@ postingsReportItemAsText opts (mdate, menddate, mdesc, p, b) =
where
hasinterval = isJust menddate
w = fromMaybe ((remaining - 2) `div` 2) mdescwidth
[datew,descw,acctw,amtw,balw] = map show [datewidth,descwidth,acctwidth,amtwidth,balwidth]

-- gather content
desc = maybe "" (take descwidth . elideRight descwidth) mdesc
desc = fromMaybe "" mdesc
acct = parenthesise $ elideAccountName awidth $ paccount p
where
(parenthesise, awidth) = case ptype p of
BalancedVirtualPosting -> (\s -> "["++s++"]", acctwidth-2)
VirtualPosting -> (\s -> "("++s++")", acctwidth-2)
_ -> (id,acctwidth)
(parenthesise, awidth) =
case ptype p of
BalancedVirtualPosting -> (\s -> "["++s++"]", acctwidth-2)
VirtualPosting -> (\s -> "("++s++")", acctwidth-2)
_ -> (id,acctwidth)
amt = showMixedAmountWithoutPrice $ pamount p
bal = showMixedAmountWithoutPrice b
-- alternate behaviour, show null amounts as 0 instead of blank
Expand All @@ -164,9 +178,6 @@ postingsReportItemAsText opts (mdate, menddate, mdesc, p, b) =
(balfirstline:balrest) = take numlines $ replicate (numlines - ballen) "" ++ ballines -- balance amount is bottom-aligned
spacer = replicate (totalwidth - (amtwidth + 2 + balwidth)) ' '

-- XXX
-- showPostingWithBalanceForVty showtxninfo p b = postingsReportItemAsText defreportopts $ mkpostingsReportItem showtxninfo p b

tests_Hledger_Cli_Register :: Test
tests_Hledger_Cli_Register = TestList
tests_postingsReportAsText
31 changes: 31 additions & 0 deletions tests/nonascii/wide-char-layout.test
@@ -0,0 +1,31 @@
# alignment calculations should handle wide characters

# 1. register, account name
hledger -f - register
<<<
1/1
知 1
b
>>>
2015/01/01 知 1 1
b -1 0
>>>=0

# # 2. balance, commodity symbol
# hledger -f - balance
# <<<
# 1/1
# a 知1
# b $-1
# >>>
# 知1 a
# $-1 b
# --------------------
# $-1
# 知1
# >>>=0

# import Text.Data.ICU.Char
# case property EastAsianWidth c of
# Wide -> 2
# _ -> 1

0 comments on commit 5b5e5ee

Please sign in to comment.