Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 69 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,51 @@ List characters:

```
❯ uni list java cecak
U+A981 ꦁ JAVANESE SIGN CECAK
U+A9B3 ꦳ JAVANESE SIGN CECAK TELU
U+A981 ꦁ [EA A6 81 ] <M,Mn> JAVANESE SIGN CECAK
U+A9B3 ꦳ [EA A6 B3 ] <M,Mn> JAVANESE SIGN CECAK TELU
```

List characters with fewer details:

```
❯ uni list java cecak -o hexbytes,name
[EA A6 81 ] JAVANESE SIGN CECAK
[EA A6 B3 ] JAVANESE SIGN CECAK TELU
```

Show only the aggregate count (`-c`), skipping output (`-o none`):

```
❯ uni list java cecak -o none -c
Matched 2 runes
```

Show only characters in a specific character category, e.g.:

```
# All "Pd" (punctuation, dash)
❯ uni list -C Pd

# All "S" (symbols)
❯ uni list -C S

# All "N" (numbers) that aren't "No" (other)
❯ uni list -C N,!No

# All "Lu" (letters, uppercase) and "Ll" (letters, lowercase)
❯ uni list -C Lu,Ll
```

List all character categories, their names, and counts:

```
❯ uni cats
KEY NAME RUNE COUNT
C Other 139751
Cc Control 65
Cf Format 170
Co Private Use 137468
[...]
```

Describe characters:
Expand Down Expand Up @@ -233,6 +276,30 @@ U+116E ᅮ HANGUL JUNGSEONG U
U+000A "\n" <control>
```

Sort input with different collation (`-l`):

```
❯ cat input.txt
Œthelwald
Zeus
Achilles

❯ cat input.txt | uni sort -l en-US
Achilles
Œthelwald
Zeus

❯ cat input.txt | uni sort -l da
Achilles
Zeus
Œthelwald

❯ cat input.txt | uni sort -l da -r
Œthelwald
Zeus
Achilles
```


`yfmt`
------
Expand Down
2 changes: 1 addition & 1 deletion cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
steps:
- name: 'golang:1.20-bookworm'
- name: 'golang:1.21-bookworm'
args:
- bash
- -c
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ github.com/gosuri/uilive v0.0.4/go.mod h1:V/epo5LjjlDE5RJUcqx8dbw+zc93y5Ya3yg8tf
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
Expand Down
105 changes: 105 additions & 0 deletions pkg/uni/categories.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package uni

import (
"fmt"
"os"
"sort"
"unicode"

"github.com/liggitt/tabwriter"
"github.com/spf13/cobra"
"golang.org/x/text/unicode/rangetable"
)

var Categories = map[string]string{
"C": "Other",
"Cc": "Control",
"Cf": "Format",
"Co": "Private Use",
"Cs": "Surrrogate",
"L": "Letter",
"Ll": "Lowercase Letter",
"Lm": "Modifier Letter",
"Lo": "Other Letter",
"Lt": "Titlecase Letter",
"Lu": "Uppercase Letter",
"M": "Mark",
"Mc": "Spacing Mark",
"Me": "Enclosing Mark",
"Mn": "Nonspacing Mark",
"N": "Number",
"Nd": "Decimal Number",
"Nl": "Letter Number",
"No": "Other Number",
"P": "Punctuation",
"Pc": "Connector Punctuation",
"Pd": "Dash Punctuation",
"Pe": "Close Punctuation",
"Pf": "Final Punctuation",
"Pi": "Initial Punctuation",
"Po": "Other Punctuation",
"Ps": "Open Punctuation",
"S": "Symbol",
"Sc": "Currency Symbol",
"Sk": "Modifier Symbol",
"Sm": "Math Symbol",
"So": "Other Symbol",
"Z": "Separator",
"Zl": "Line Separator",
"Zp": "Paragraph Separator",
"Zs": "Space Separator",
}

func init() {
for cat := range unicode.Categories {
if _, ok := Categories[cat]; !ok {
Categories[cat] = cat
}
}
}

func newCategoriesCommand() *cobra.Command {
r := &catter{}
c := cobra.Command{
Use: "categories",
Aliases: []string{"cat", "cats"},
DisableFlagsInUseLine: true,
SilenceErrors: true,

Short: "List Unicode categories",
RunE: r.run,
}

c.Flags().StringVarP(&r.table, "table", "t", r.table, "Unicode Table version")
return &c
}

type catter struct {
table string
}

func (_ *catter) run(c *cobra.Command, args []string) error {
cats := []string{}
for cat := range Categories {
cats = append(cats, cat)
}
sort.Strings(cats)

t := tabwriter.NewWriter(os.Stdout, 6, 4, 3, ' ', tabwriter.RememberWidths)
defer t.Flush()

fmt.Fprintf(t, "%s\t%s\t%s\n", "KEY", "NAME", "RUNE COUNT")
for _, cat := range cats {
fmt.Fprintf(t, "%s\t%s\t%d\n", cat, Categories[cat], countCharactersIn(cat))
}

return nil
}

func countCharactersIn(cat string) int {
count := 0
rangetable.Visit(unicode.Categories[cat], func(_ rune) {
count += 1
})
return count
}
56 changes: 52 additions & 4 deletions pkg/uni/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package uni
import (
"errors"
"fmt"
"sort"
"strings"
"unicode"
"unicode/utf8"
Expand All @@ -18,12 +19,12 @@ var (

func newListCommand() *cobra.Command {
l := &lister{
output: []string{"id", "rune", "hexbytes", "name"},
output: []string{"id", "rune", "hexbytes", "categories", "name"},
table: unicode.Version,
}

c := cobra.Command{
Use: "list [--all | <RUNE-NAME>]",
Use: "list [--all | --categories <CATEGORIES> | <RUNE-NAME>]",
DisableFlagsInUseLine: true,
SilenceErrors: true,

Expand All @@ -33,6 +34,7 @@ func newListCommand() *cobra.Command {
}

c.Flags().BoolVarP(&l.all, "all", "A", l.all, "List all")
c.Flags().StringSliceVarP(&l.cats, "categories", "C", l.cats, "Categories to limit to")
c.Flags().BoolVarP(&l.count, "count", "c", l.count, "Show count of matches")
c.Flags().StringSliceVarP(&l.output, "output", "o", l.output, "Output columns")
c.Flags().StringVarP(&l.table, "table", "t", l.table, "Unicode Table version")
Expand All @@ -41,6 +43,7 @@ func newListCommand() *cobra.Command {

type lister struct {
all bool
cats []string
count bool
output []string
table string
Expand All @@ -52,6 +55,29 @@ func (l *lister) run(c *cobra.Command, args []string) error {
return fmt.Errorf("unicode table version %s: %w", l.table, ErrNoUnicodeTable)
}

excludes := []*unicode.RangeTable{}
if len(l.cats) > 0 {
rts := []*unicode.RangeTable{}
for _, cat := range l.cats {
rt, ok := unicode.Categories[cat]
if !ok {
if strings.HasPrefix(cat, "!") {
cat = strings.TrimPrefix(cat, "!")
if unrt, ok := unicode.Categories[cat]; ok {
excludes = append(excludes, unrt)
continue
}
}

return fmt.Errorf("unicode category %q does not exist; see `uni categories`", cat)
}

rts = append(rts, rt)
}

t = rangetable.Merge(rts...)
}

cols := map[string]bool{}
for _, o := range l.output {
cols[o] = true
Expand All @@ -60,6 +86,14 @@ func (l *lister) run(c *cobra.Command, args []string) error {
count := 0
norm := strings.Split(strings.ToUpper(strings.Join(args, ":")), ":")
rangetable.Visit(t, func(r rune) {
if len(excludes) > 0 {
for _, exclude := range excludes {
if unicode.Is(exclude, r) {
return
}
}
}

name := runenames.Name(r)
if runeMatches(norm, name) {
disp := []string{}
Expand All @@ -76,6 +110,9 @@ func (l *lister) run(c *cobra.Command, args []string) error {
if cols["hexbytes"] {
disp = append(disp, fmt.Sprintf("[%s]", runeToHexBytes(r)))
}
if cols["cats"] || cols["categories"] {
disp = append(disp, fmt.Sprintf("<%s>", runeToCategories(r)))
}
if cols["name"] {
disp = append(disp, name)
}
Expand All @@ -93,8 +130,8 @@ func (l *lister) run(c *cobra.Command, args []string) error {
}

func (l *lister) validate(_ *cobra.Command, args []string) error {
if len(args) < 1 && !l.all {
return errors.New("at least one argument, or an explicit --all flag is required")
if len(args) < 1 && !l.all && len(l.cats) == 0 {
return errors.New("at least one argument, or an explicit --all flag, or one or more --categories is required")
}
return nil
}
Expand Down Expand Up @@ -123,3 +160,14 @@ func runeToHexBytes(r rune) string {

return strings.Join(hexbytes, " ")
}

func runeToCategories(r rune) string {
cats := []string{}
for cat, rt := range unicode.Categories {
if unicode.Is(rt, r) {
cats = append(cats, cat)
}
}
sort.Strings(cats)
return strings.Join(cats, ",")
}
Loading