-
Notifications
You must be signed in to change notification settings - Fork 20
/
text.go
42 lines (35 loc) · 1.25 KB
/
text.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
package utils
import (
"net/url"
"regexp"
"strings"
)
var snakedChars = regexp.MustCompile(`[^\p{L}\d_]+`)
// Snakify turns the passed in string into a context reference. We replace all whitespace
// characters with _ and replace any duplicate underscores
func Snakify(text string) string {
return strings.Trim(strings.ToLower(snakedChars.ReplaceAllString(text, "_")), "_")
}
// URLEscape escapes spaces as %20 matching urllib.quote(s, safe="") in Python
func URLEscape(s string) string {
return strings.Replace(url.QueryEscape(s), "+", "%20", -1)
}
// see: https://en.wikipedia.org/wiki/Emoji for emoji ranges
var wordTokenRegex = regexp.MustCompile("((\\pL|\\pN|[\u20A0-\u20CF]|[\u2600-\u27BF])+|[\U0001F170-\U0001F9CF])")
// TokenizeString returns the words in the passed in string, split by non word characters including emojis
func TokenizeString(str string) []string {
return wordTokenRegex.FindAllString(str, -1)
}
// TokenizeStringByChars returns the words in the passed in string, split by the chars in the given string
func TokenizeStringByChars(str string, chars string) []string {
runes := []rune(chars)
f := func(c rune) bool {
for _, r := range runes {
if c == r {
return true
}
}
return false
}
return strings.FieldsFunc(str, f)
}