-
Notifications
You must be signed in to change notification settings - Fork 20
/
text.go
65 lines (56 loc) · 1.9 KB
/
text.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package utils
import (
"regexp"
"strings"
)
var snakedChars = regexp.MustCompile(`[^\p{L}\d_]+`)
// treats sequences of letters/numbers/_/' as tokens, and symbols as individual tokens
var wordTokenRegex = regexp.MustCompile(`[\pM\pL\pN_']+|\pS`)
// Snakify turns the passed in string into a context reference. We replace all whitespace
// characters with _ and replace any duplicate underscores
func Snakify(text string) string {
return strings.Trim(strings.ToLower(snakedChars.ReplaceAllString(text, "_")), "_")
}
// TokenizeString returns the words in the passed in string, split by non word characters including emojis
func TokenizeString(str string) []string {
return wordTokenRegex.FindAllString(str, -1)
}
// TokenizeStringByChars returns the words in the passed in string, split by the chars in the given string
func TokenizeStringByChars(str string, chars string) []string {
runes := []rune(chars)
f := func(c rune) bool {
for _, r := range runes {
if c == r {
return true
}
}
return false
}
return strings.FieldsFunc(str, f)
}
// PrefixOverlap returns the number of prefix characters which s1 and s2 have in common
func PrefixOverlap(s1, s2 string) int {
r1 := []rune(s1)
r2 := []rune(s2)
r := 0
for ; r < len(r1) && r < len(r2) && r1[r] == r2[r]; r++ {
}
return r
}
// StringSlices returns the slices of s defined by pairs of indexes in indices
func StringSlices(s string, indices []int) []string {
slices := make([]string, 0, len(indices)/2)
for i := 0; i < len(indices); i += 2 {
slices = append(slices, s[indices[i]:indices[i+1]])
}
return slices
}
// StringSliceContains determines whether the given slice of strings contains the given string
func StringSliceContains(slice []string, str string, caseSensitive bool) bool {
for _, s := range slice {
if (caseSensitive && s == str) || (!caseSensitive && strings.ToLower(s) == strings.ToLower(str)) {
return true
}
}
return false
}