This repository has been archived by the owner on May 6, 2023. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
decimalformat.go
220 lines (181 loc) · 6.85 KB
/
decimalformat.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
package lxstrconv
import (
"math"
"strconv"
"unicode"
"unicode/utf8"
"golang.org/x/text/language"
"golang.org/x/text/message"
"golang.org/x/text/number"
)
// acceptRune returns the length of r in bytes if r is the first rune in s,
// otherwise returns zero.
func acceptRune(r rune, s string) int {
if f, ok := firstRune(s); ok && (f == r) {
return utf8.RuneLen(r)
} else {
return 0
}
}
// firstRune returns the first rune in a string and true, or (_, false).
func firstRune(s string) (rune, bool) {
for _, c := range s {
return c, true
}
return runeNone, false
}
// guessDecimalGroupSeparator guesses, for a printer in a given locale,
// the group separator rune in a decimal number system e.g. comma for British.
func guessDecimalGroupSeparator(p *message.Printer) rune {
// heuristic: any rune that appears at least twice is probably a comma
s := p.Sprint(number.Decimal(1234567890))
return repeatingRune(s)
}
// guessDecimalPointSeparator guesses, for a printer in a given locale,
// the decimal point rune in a decimal number system, e.g. period for British.
func guessDecimalPoint(p *message.Printer) rune {
// heuristic: any rune that is common to both these strings is probably a
// decimal point. Concat the strings and find any repeated rune.
s1 := p.Sprint(number.Decimal(1.23))
s2 := p.Sprint(number.Decimal(4.56))
s := s1 + s2
return repeatingRune(s)
}
// guessDecimalDigits guesses, for a printer in a given locale, the digits
// representing the values 0 to 9.
func guessDecimalDigits(p *message.Printer, out *[10]rune) {
for i := 0; i < 10; i++ {
s := []rune(p.Sprint(number.Decimal(i)))
if len(s) == 1 {
out[i] = s[0]
} else {
out[i] = runeNone
}
}
}
// decimalFormat defines how a decimal (base-10) number should be parsed for a
// given locale. Note that the behaviour is undefined for locales that have
// non-base-10 number systems.
//
// This structure is currently internal until we have more confidence it is
// correct for all languages with decimal number systems.
type decimalFormat struct {
// GroupSeparator is a digits separator such as commas for thousands. In
// addition to any separator defined here, a parser will ignore whitespace.
GroupSeparator rune
// Point is separator between the integer and fractional part of
// a decimal number.
Point rune
// Digits are an ascending list of digit runes
Digits [10]rune
}
func (f decimalFormat) ParseInt(s string) (int64, error) {
if len(s) == 0 { return 0, strconv.ErrSyntax }
value, length, err := f.AcceptInt(s)
if err != nil { return 0, err }
if len(s) != length { return 0, strconv.ErrSyntax }
return value, nil
}
func (f decimalFormat) ParseFloat(s string) (float64, error) {
if len(s) == 0 { return 0, strconv.ErrSyntax }
value, length, err := f.AcceptFloat(s)
if err != nil { return 0, err }
if len(s) != length { return 0, strconv.ErrSyntax }
return value, nil
}
// NewDecimalFormat constructs, for a given locale, a NumberFormat that
// defines how a decimal (base-10) number should be parsed. Note that the
// behaviour is undefined for locales that have non-base-10 number systems.
func NewDecimalFormat(tag language.Tag) NumberFormat {
// Unfortunately, I couldn't find any exported symbols in /x/text that
// gives this information directly (as would be ideal). Therefore this
// function works by printing numbers in the current locale and using
// heuristics to guess the correct separators.
p := message.NewPrinter(tag)
format := decimalFormat{
GroupSeparator: guessDecimalGroupSeparator(p),
Point: guessDecimalPoint(p),
}
guessDecimalDigits(p, &format.Digits)
return format
}
// returns (0-9, true) for a decimal digit in any language, or (_, false)
func decimalRuneToInt(d rune, digits *[10]rune) (int, bool) {
for i := 0; i < 10; i++ {
if d == digits[i] { return i, true }
}
return 0, false
}
// AcceptInteger parses as much of an integer number as possible. It returns a
// 2 tuple: the value of the parsed integer, and the length of the characters
// successfully parsed. For example, for some locales, the string "1,000X"
// returns (1000, 5) and the string "foo" returns (0, 0).
//
// Err is always nil, strconv.ErrRange or strconv.ErrSyntax
func (f decimalFormat) AcceptInt(s string) (value int64, length int, err error) {
if len(s) == 0 { return 0, 0, nil }
if s[0] == '-' {
// TODO better negative check e.g. "(1)" for "-1"
v, l, _ := f.AcceptUint(s[1:])
// TODO bounds check
if l > 0 {
return int64(v) * -1, l + 1, nil
} else {
return 0, 0, nil
}
}
// TODO bounds check
v, l, err := f.AcceptUint(s)
return int64(v), l, nil
}
// AcceptUint: see AcceptInt
func (f decimalFormat) AcceptUint(s string) (value uint64, length int, err error) {
var accu uint64
for i, c := range s {
if c == f.GroupSeparator {
// pass
} else if unicode.IsSpace(c) {
// pass
} else if d, ok := decimalRuneToInt(c, &f.Digits); ok {
accu *= 10
accu += uint64(d)
// TODO bounds check
} else {
// TODO this count is runes but should be bytes!
return accu, i, nil
}
}
return accu, len(s), nil
}
// AcceptFloat parses as much of a floating point number as possible. It returns
// a 2 tuple: the value of the parsed float, and the length of the characters
// successfully parsed. For example, for some locales, the string "1.23X"
// returns (1.23, 4) and the string "foo" returns (0.0, 0).
//
// Err is always nil, strconv.ErrRange or strconv.ErrSyntax
func (f decimalFormat) AcceptFloat(s string) (value float64, length int, err error) {
var left, right int64
var leftLen, rightLen, pointLen int
var fLeft, fRight float64
// accept leading decimal point
if first, ok := firstRune(s); ok && first != f.Point {
left, leftLen, err = f.AcceptInt(s)
// TODO check err (Currently always nil)
if leftLen == 0 { return 0, 0, nil }
fLeft = float64(left)
}
pointLen = acceptRune(f.Point, s[leftLen:])
if pointLen > 0 && (s[leftLen +pointLen] != '-') {
right, rightLen, err = f.AcceptInt(s[leftLen +pointLen:])
// TODO check err (currently always nil)
}
if right > 0.0 {
fRight = float64(right)
places := float64(1.0 + math.Floor(math.Log10(fRight)))
fRight *= math.Pow(0.1, places)
fRight = math.Copysign(fRight, fLeft)
}
value = fLeft + fRight
length = leftLen + pointLen + rightLen
return value, length, nil
}