Skip to content

Commit

Permalink
Apply @kidayasuo's proposal
Browse files Browse the repository at this point in the history
  • Loading branch information
kojiishi committed Feb 22, 2024
1 parent a5632f6 commit f14aec8
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 47 deletions.
62 changes: 24 additions & 38 deletions auto-spacing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -586,51 +586,40 @@
2DD8..2DDE ; N # N ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
2DE0..2DFF ; N # N COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
2E2F ; N # N VERTICAL TILDE
2E80..2E99 ; W # W CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; W # W CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; W # W KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
2E80..2E99 ; O # W CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; O # W CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; O # W KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
2FF0..2FFB ; O # W IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; O # F IDEOGRAPHIC SPACE
3001..3004 ; O # W IDEOGRAPHIC COMMA..JAPANESE INDUSTRIAL STANDARD SYMBOL
3005..3007 ; W # W IDEOGRAPHIC ITERATION MARK..IDEOGRAPHIC NUMBER ZERO
3008..3012 ; O # W LEFT ANGLE BRACKET..POSTAL MARK
3013 ; W # W GETA MARK
3014..3020 ; O # W LEFT TORTOISE SHELL BRACKET..POSTAL MARK FACE
3021..302D ; W # W HANGZHOU NUMERAL ONE..IDEOGRAPHIC ENTERING TONE MARK
302E..302F ; N # W HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
3005 ; N # W IDEOGRAPHIC ITERATION MARK
3006 ; W # W IDEOGRAPHIC CLOSING MARK
3007..3029 ; O # W IDEOGRAPHIC NUMBER ZERO..HANGZHOU NUMERAL NINE
302A..302F ; N # W IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3030 ; O # W WAVY DASH
3031..3035 ; W # W VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
3036 ; O # W CIRCLED POSTAL MARK
3037..303C ; W # W IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL..MASU MARK
303D ; O # W PART ALTERNATION MARK
303E ; W # W IDEOGRAPHIC VARIATION INDICATOR
303F ; W # N IDEOGRAPHIC HALF FILL SPACE
3036..303A ; O # W CIRCLED POSTAL MARK..HANGZHOU NUMERAL THIRTY
303B ; N # W VERTICAL IDEOGRAPHIC ITERATION MARK
303C ; W # W MASU MARK
303D..303E ; O # W PART ALTERNATION MARK..IDEOGRAPHIC VARIATION INDICATOR
3041..3096 ; W # W HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
3099..309F ; W # W COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..HIRAGANA DIGRAPH YORI
3099..309A ; N # W COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
309B..309C ; O # W KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
309D..309F ; W # W HIRAGANA ITERATION MARK..HIRAGANA DIGRAPH YORI
30A0 ; O # W KATAKANA-HIRAGANA DOUBLE HYPHEN
30A1..30FA ; W # W KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FB ; O # W KATAKANA MIDDLE DOT
30FC..30FF ; W # W KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA DIGRAPH KOTO
3105..312F ; N # W BOPOMOFO LETTER B..BOPOMOFO LETTER NN
3131..318E ; N # W HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
3190..319F ; W # W IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION MAN MARK
3190..319F ; O # W IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31A0..31BF ; N # W BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
31C0..31E3 ; W # W CJK STROKE T..CJK STROKE Q
31C0..31E3 ; O # W CJK STROKE T..CJK STROKE Q
31F0..31FF ; W # W KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200..321E ; O # W PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3220..3247 ; W # W PARENTHESIZED IDEOGRAPH ONE..CIRCLED IDEOGRAPH KOTO
3220..3247 ; O # W PARENTHESIZED IDEOGRAPH ONE..CIRCLED IDEOGRAPH KOTO
3248..324F ; O # A CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
3250..327F ; O # W PARTNERSHIP SIGN..KOREAN STANDARD SYMBOL
3280..32B0 ; W # W CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH NIGHT
32B1..32BF ; O # W CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
32C0..32CB ; W # W IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER
32CC..32CF ; O # W SQUARE HG..LIMITED LIABILITY SIGN
32D0..3370 ; W # W CIRCLED KATAKANA A..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR
3371..337A ; O # W SQUARE HPA..SQUARE IU
337B..337F ; W # W SQUARE ERA NAME HEISEI..SQUARE CORPORATION
3380..33DF ; O # W SQUARE PA AMPS..SQUARE A OVER M
33E0..33FE ; W # W IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
33FF ; O # W SQUARE GAL
3250..33FF ; O # W PARTNERSHIP SIGN..SQUARE GAL
3400..4DBF ; W # W CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; W # W CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
9FFD..9FFF ; O # W CJK UNIFIED IDEOGRAPH-9FFD..CJK UNIFIED IDEOGRAPH-9FFF
Expand All @@ -643,7 +632,6 @@ A640..A672 ; N # N CYRILLIC CAPITAL LETTER ZEMLYA..COMBINING CYRILLIC THO
A674..A67D ; N # N COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
A67F..A6E5 ; N # N CYRILLIC PAYEROK..BAMUM LETTER KI
A6F0..A6F1 ; N # N BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A700..A707 ; W # N MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU
A717..A71F ; N # N MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A722..A788 ; N # N LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A78B..A7BF ; N # N LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER GLOTTAL U
Expand Down Expand Up @@ -899,11 +887,11 @@ FFFD ; O # A REPLACEMENT CHARACTER
16F00..16F4A ; N # N MIAO LETTER PA..MIAO LETTER RTE
16F4F..16F87 ; N # N MIAO SIGN CONSONANT MODIFIER BAR..MIAO VOWEL SIGN UI
16F8F..16F9F ; N # N MIAO TONE RIGHT..MIAO LETTER REFORMED TONE-8
16FE0..16FE1 ; W # W TANGUT ITERATION MARK..NUSHU ITERATION MARK
16FE0..16FE1 ; N # W TANGUT ITERATION MARK..NUSHU ITERATION MARK
16FE2 ; O # W OLD CHINESE HOOK MARK
16FE3 ; N # W OLD CHINESE ITERATION MARK
16FE4 ; W # W KHITAN SMALL SCRIPT FILLER
16FF0..16FF1 ; W # W VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
16FE3 ; W # W OLD CHINESE ITERATION MARK
16FE4 ; N # W KHITAN SMALL SCRIPT FILLER
16FF0..16FF1 ; N # W VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
17000..187F7 ; W # W ..
18800..18CD5 ; W # W TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
18D00..18D08 ; W # W ..
Expand All @@ -926,7 +914,6 @@ FFFD ; O # A REPLACEMENT CHARACTER
1D185..1D18B ; N # N MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; N # N MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1D242..1D244 ; N # N COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1D360..1D371 ; W # N COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
1D400..1D454 ; N # N MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; N # N MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; N # N MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
Expand Down Expand Up @@ -1021,11 +1008,10 @@ FFFD ; O # A REPLACEMENT CHARACTER
1F18F..1F190 ; O # A NEGATIVE SQUARED WC..SQUARE DJ
1F191..1F19A ; O # W SQUARED CL..SQUARED VS
1F19B..1F1AC ; O # A SQUARED THREE D..SQUARED VOD
1F200 ; W # W SQUARE HIRAGANA HOKA
1F201..1F202 ; O # W SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
1F200..1F202 ; O # W SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
1F210..1F23B ; O # W SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; O # W TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; W # W CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F250..1F251 ; O # W CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F260..1F265 ; O # W ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F300..1F320 ; O # W CYCLONE..SHOOTING STAR
1F32D..1F335 ; O # W HOT DOG..CACTUS
Expand Down
13 changes: 4 additions & 9 deletions src/auto-spacing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,10 @@
class AutoSpacing(object):

def __init__(self) -> None:
# https://drafts.csswg.org/css-text-4/#text-spacing-classes
ideographs = UnicodeSet()
ideographs.addAll(UnicodeSet(r'[[:sc=Han:][:scx=Han:]-[:P:]]'))
ideographs.addAll(UnicodeSet(r'[[:sc=Tang:][:scx=Tang:]-[:P:]]'))
ideographs.addAll(UnicodeSet(r'[[:sc=Kits:][:scx=Kits:]-[:P:]]'))
ideographs.addAll(UnicodeSet(r'[[:sc=Nshu:][:scx=Nshu:]-[:P:]]'))
ideographs.addAll(UnicodeSet(r'[[:sc=Hira:][:scx=Hira:]-[:P:]]'))
ideographs.addAll(UnicodeSet(r'[[:sc=Kana:][:scx=Kana:]-[:P:]]'))
ideographs.removeAll(UnicodeSet(r'[[:ea=H:]]'))
# https://github.com/w3c/csswg-drafts/pull/9503#discussion_r1374477268
ideographs = UnicodeSet(
r'[[[:sc=Hiragana:][:sc=Katakana:][:sc=Common:][:ideographic:]' +
r'&[:gc=L:]&[:ea=W:]][[:gc=L:][:gc=Nl:]&[:gc=So:]&[:sc=Hani:]]]')

letters_numerals = UnicodeSet()
letters_numerals.addAll(UnicodeSet(r'[[:L:][:M:][:Nd:]]'))
Expand Down

0 comments on commit f14aec8

Please sign in to comment.