Skip to content

Commit

Permalink
update unicode version
Browse files Browse the repository at this point in the history
  • Loading branch information
TimWhiting committed Feb 3, 2024
1 parent 43f9d01 commit 3779cd8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 12 deletions.
23 changes: 13 additions & 10 deletions lib/std/text/unicode.kk
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ pub fun string/width( s : string ) : int {
//--------------------------------------------------------------

// These characters are considered wide, i.e. 2 columns wide.
// https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt
// https://www.unicode.org/Public/16.0.0/ucd/EastAsianWidth.txt
// See ranges with postfix ;W
//
// Update with `python3 util/update-unicode.py -a`
// TODO: Handle 'unassigned' ranges: (Following is an excerpt from https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt )
// TODO: Handle 'unassigned' ranges: (Following is an excerpt from https://www.unicode.org/Public/16.0.0/ucd/EastAsianWidth.txt )
// - All code points, assigned or unassigned, that are not listed
// explicitly are given the value "N".
// - The unassigned code points in the following blocks default to "W":
Expand Down Expand Up @@ -181,7 +181,7 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x2E80,0x2E99),
single(0x2E9B,0x2EF3),
single(0x2F00,0x2FD5),
single(0x2FF0,0x2FFB),
single(0x2FF0,0x2FFF),
single(0x3001,0x3003),
single(0x3004,0x3004),
single(0x3005,0x3005),
Expand Down Expand Up @@ -237,7 +237,8 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x3192,0x3195),
single(0x3196,0x319F),
single(0x31A0,0x31BF),
single(0x31C0,0x31E3),
single(0x31C0,0x31E5),
single(0x31EF,0x31EF),
single(0x31F0,0x31FF),
single(0x3200,0x321E),
single(0x3220,0x3229),
Expand Down Expand Up @@ -314,6 +315,7 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x17000,0x187F7),
single(0x18800,0x18AFF),
single(0x18B00,0x18CD5),
single(0x18CFF,0x18CFF),
single(0x18D00,0x18D08),
single(0x1AFF0,0x1AFF3),
single(0x1AFF5,0x1AFFB),
Expand Down Expand Up @@ -368,11 +370,10 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x1F93C,0x1F945),
single(0x1F947,0x1F9FF),
single(0x1FA70,0x1FA7C),
single(0x1FA80,0x1FA88),
single(0x1FA90,0x1FABD),
single(0x1FABF,0x1FAC5),
single(0x1FACE,0x1FADB),
single(0x1FAE0,0x1FAE8),
single(0x1FA80,0x1FA89),
single(0x1FA8F,0x1FAC6),
single(0x1FACE,0x1FADC),
single(0x1FADF,0x1FAE9),
single(0x1FAF0,0x1FAF8),
single(0x20000,0x2A6DF),
single(0x2A6E0,0x2A6FF),
Expand All @@ -383,7 +384,9 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x2B820,0x2CEA1),
single(0x2CEA2,0x2CEAF),
single(0x2CEB0,0x2EBE0),
single(0x2EBE1,0x2F7FF),
single(0x2EBE1,0x2EBEF),
single(0x2EBF0,0x2EE5D),
single(0x2EE5E,0x2F7FF),
single(0x2F800,0x2FA1D),
single(0x2FA1E,0x2FA1F),
single(0x2FA20,0x2FFFD),
Expand Down
3 changes: 3 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
# - support/vscode/koka.language-koka/package.json
# - whatsnew.md, readme.md

# Also update unicode asian-width list in `std/text/unicode`
# using the output of `python3 util/update-unicode.py -a`
# after checking for a newer version of the unicode link it fetches from

name: koka
version: 3.0.5
Expand Down
5 changes: 3 additions & 2 deletions util/update-unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
parser.add_argument("-a", "--asian_wide", default=False, action='store_true')
args = parser.parse_args()
if args.asian_wide:
result = requests.get("https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt")
# TODO: Check for latest version (right now it is 16.0.0), also two links in documentation in std/text/unicode
result = requests.get("https://www.unicode.org/Public/16.0.0/ucd/EastAsianWidth.txt")
for line in result.text.split("\n"):
values = line.split(";")
if len(values) >= 2:
width = values[1].split("#")[0].strip()
if width == "W":
charrange = values[0].split("..")
charrange = values[0].strip().split("..")
if len(charrange) == 2:
print(f" single(0x{charrange[0]},0x{charrange[1]}),")
elif len(charrange) == 1:
Expand Down

0 comments on commit 3779cd8

Please sign in to comment.