From f0f5f8a39636da358e866137d9308c6be0c80420 Mon Sep 17 00:00:00 2001 From: kercylan98 Date: Thu, 25 Apr 2024 14:55:19 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=20charproc=20?= =?UTF-8?q?=E5=8C=85=E5=A4=84=E7=90=86=E5=AD=97=E7=AC=A6=E3=80=81=E6=96=87?= =?UTF-8?q?=E6=9C=AC=E6=93=8D=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- toolkit/charproc/cast.go | 57 ++++++++++++ toolkit/charproc/sensitivity.go | 159 ++++++++++++++++++++++++++++++++ toolkit/charproc/speparator.go | 21 +++++ toolkit/charproc/string.go | 57 ++++++++++++ utils/str/transform.go | 2 +- 5 files changed, 295 insertions(+), 1 deletion(-) create mode 100644 toolkit/charproc/cast.go create mode 100644 toolkit/charproc/sensitivity.go create mode 100644 toolkit/charproc/speparator.go create mode 100644 toolkit/charproc/string.go diff --git a/toolkit/charproc/cast.go b/toolkit/charproc/cast.go new file mode 100644 index 00000000..5d9f7d36 --- /dev/null +++ b/toolkit/charproc/cast.go @@ -0,0 +1,57 @@ +package charproc + +// FirstUpper 返回字符串的首字母大写形式 +func FirstUpper(s string) string { + if len(s) == 0 { + return s + } + return string(s[0]-32) + s[1:] +} + +// FirstLower 返回字符串的首字母小写形式 +func FirstLower(s string) string { + if len(s) == 0 { + return s + } + return string(s[0]+32) + s[1:] +} + +// LenWithChinese 返回含中文字符串的长度,一个中文字符长度为 1 +func LenWithChinese(s string) int { + return len([]rune(s)) +} + +// Snake 蛇形字符串 +func Snake(str string) string { + var snakeStr string + vv := []rune(str) + for i := 0; i < len(vv); i++ { + if vv[i] >= 65 && vv[i] <= 90 { + vv[i] += 32 + snakeStr += "_" + string(vv[i]) + } else { + snakeStr += string(vv[i]) + } + } + return snakeStr +} + +// Camel 驼峰字符串 +func Camel(str string) string { + var camelStr string + vv := []rune(str) + for i := 0; i < len(vv); i++ { + if vv[i] == '_' { + i++ + if vv[i] >= 97 && vv[i] <= 122 { + vv[i] -= 32 + camelStr += string(vv[i]) + } else { + return str + } + } else { + camelStr += string(vv[i]) + } + } + return camelStr +} diff --git a/toolkit/charproc/sensitivity.go b/toolkit/charproc/sensitivity.go new file mode 100644 index 00000000..2527693e --- /dev/null +++ b/toolkit/charproc/sensitivity.go @@ -0,0 +1,159 @@ +package charproc + +import ( + "regexp" + "strings" +) + +// SensitiveTrieNode 敏感词树节点 +type SensitiveTrieNode struct { + nodes map[rune]*SensitiveTrieNode + match bool +} + +// Add 添加敏感词 +func (s *SensitiveTrieNode) Add(text string) { + if s.nodes == nil { + s.nodes = make(map[rune]*SensitiveTrieNode) + } + chars := []rune(strings.ToUpper(text)) + l := len(chars) + if l == 0 { + return + } + node := s + for i := 0; i < l; i++ { + ch := chars[i] + if _, ok := node.nodes[ch]; !ok { + node.nodes[ch] = &SensitiveTrieNode{nodes: make(map[rune]*SensitiveTrieNode)} + } + node = node.nodes[ch] + } + node.match = true +} + +// Check 检查是否包含敏感词 +func (s *SensitiveTrieNode) Check(chars []rune) bool { + l := len(chars) + if l == 0 { + return false + } + + nodes := s.nodes + for i := 0; i < l; i++ { + ch := s.runeToUpper(chars[i]) + node, ok := nodes[ch] + if !ok { + continue + } + if node.match { + return true + } + nodes = node.nodes + for j := i + 1; j < l; j++ { + ch = s.runeToUpper(chars[j]) + node, ok := nodes[ch] + if !ok { + break + } + if node.match { + return true + } + nodes = node.nodes + } + nodes = s.nodes + } + return false +} + +// Replace 替换敏感词为指定字符 +func (s *SensitiveTrieNode) Replace(chars []rune, rep rune) []rune { + l := len(chars) + if l == 0 { + return chars + } + + nodes := s.nodes + for i := 0; i < l; i++ { + ch := s.runeToUpper(chars[i]) + node, ok := nodes[ch] + if !ok { + continue + } + if node.match { + for j := i; j < l; j++ { + chars[j] = rep + } + } + nodes = node.nodes + for j := i + 1; j < l; j++ { + ch = s.runeToUpper(chars[j]) + node, ok := nodes[ch] + if !ok { + break + } + if node.match { + for k := i; k <= j; k++ { + chars[k] = rep + } + i = j + break + } + nodes = node.nodes + } + nodes = s.nodes + } + return chars +} + +func (s *SensitiveTrieNode) runeToUpper(r rune) rune { + if r >= 'a' && r <= 'z' { + r -= 'a' - 'A' + } + return r +} + +// HideSensitivity 返回防敏感化后的字符串 +// - 隐藏身份证、邮箱、手机号等敏感信息用 * 号替代 +func HideSensitivity(str string) (result string) { + if str == "" { + return "***" + } + if strings.Contains(str, "@") { + res := strings.Split(str, "@") + if len(res[0]) < 3 { + resString := "***" + result = resString + "@" + res[1] + return result + } + resRs := []rune(str) + res2 := string(resRs[0:3]) + resString := res2 + "***" + result = resString + "@" + res[1] + return result + } else { + reg := `^1[0-9]\d{9}$` + rgx := regexp.MustCompile(reg) + mobileMatch := rgx.MatchString(str) + if mobileMatch { + rs := []rune(str) + result = string(rs[0:5]) + "****" + string(rs[7:11]) + return + } + nameRune := []rune(str) + lens := len(nameRune) + + if lens <= 1 { + result = "***" + } else if lens == 2 { + result = string(nameRune[:1]) + "*" + } else if lens == 3 { + result = string(nameRune[:1]) + "*" + string(nameRune[2:3]) + } else if lens == 4 { + result = string(nameRune[:1]) + "**" + string(nameRune[lens-1:lens]) + } else { + result = string(nameRune[:2]) + "***" + string(nameRune[lens-2:lens]) + } + return + } +} diff --git a/toolkit/charproc/speparator.go b/toolkit/charproc/speparator.go new file mode 100644 index 00000000..31b1513d --- /dev/null +++ b/toolkit/charproc/speparator.go @@ -0,0 +1,21 @@ +package charproc + +import "strings" + +// ThousandsSeparator 返回将str进行千位分隔符处理后的字符串。 +func ThousandsSeparator(str string) string { + length := len(str) + if length < 4 { + return str + } + arr := strings.Split(str, ".") //用小数点符号分割字符串,为数组接收 + length1 := len(arr[0]) + if length1 < 4 { + return str + } + count := (length1 - 1) / 3 + for i := 0; i < count; i++ { + arr[0] = arr[0][:length1-(i+1)*3] + "," + arr[0][length1-(i+1)*3:] + } + return strings.Join(arr, ".") //将一系列字符串连接为一个字符串,之间用sep来分隔。 +} diff --git a/toolkit/charproc/string.go b/toolkit/charproc/string.go new file mode 100644 index 00000000..3d24f547 --- /dev/null +++ b/toolkit/charproc/string.go @@ -0,0 +1,57 @@ +package charproc + +import ( + "slices" + "strings" +) + +const ( + None = "" // 空字符串 + Dunno = "?" // 未知 + CenterDot = "·" // 中点 + Dot = "." // 点 + Slash = "/" // 斜杠 +) + +const ( + NoneChar = byte(0) // 空字符 + DunnoChar = byte('?') // 未知 + CenterDotChar = byte('·') // 中点 + DotChar = byte('.') // 点 + SlashChar = byte('/') // 斜杠 +) + +// IsUpper 判断字符是否为大写字母 +func IsUpper(r rune) bool { + return r >= 'A' && r <= 'Z' +} + +// IsLower 判断字符是否为小写字母 +func IsLower(r rune) bool { + return r >= 'a' && r <= 'z' +} + +// IsLetter 判断字符是否为字母 +func IsLetter(r rune) bool { + return IsUpper(r) || IsLower(r) +} + +// IsDigit 判断字符是否为数字 +func IsDigit(r rune) bool { + return r >= '0' && r <= '9' +} + +// IsSpace 判断字符是否为空白字符 +func IsSpace(r rune) bool { + return r == ' ' || r == '\t' || r == '\n' || r == '\r' +} + +// SortJoin 将多个字符串排序后拼接 +func SortJoin(delimiter string, s ...string) string { + var strList = make([]string, 0, len(s)) + for _, str := range s { + strList = append(strList, str) + } + slices.Sort(strList) + return strings.Join(strList, delimiter) +} diff --git a/utils/str/transform.go b/utils/str/transform.go index 8aa01f1c..325fad9a 100644 --- a/utils/str/transform.go +++ b/utils/str/transform.go @@ -44,7 +44,7 @@ func HideSensitivity(str string) (result string) { result = string(nameRune[:1]) + "*" + string(nameRune[2:3]) } else if lens == 4 { result = string(nameRune[:1]) + "**" + string(nameRune[lens-1:lens]) - } else if lens > 4 { + } else { result = string(nameRune[:2]) + "***" + string(nameRune[lens-2:lens]) } return