forked from robertkrimen/otto
/
regexp_public.go
104 lines (89 loc) · 2.68 KB
/
regexp_public.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package parser
import (
"bytes"
"log"
"os"
"runtime"
"github.com/kirillDanshin/myutils"
)
const (
_regexp_empty = ""
_regexp_any_nc = ".*"
_regexp_any_c = "(.*)"
_regexp_anyplus_nc = ".+"
_regexp_anyplus_c = "(.+)"
_regexp_squote_nc = `'`
_regexp_squote_c = `(')`
_regexp_dquote_nc = `"`
_regexp_dquote_c = `(")`
_regexp_plus_nc = `\+`
_regexp_plus_c = `(\+)`
_regexp_urlenc_space_nc = `%20`
_regexp_urlenc_space_c = `(%20)`
_regexp_http_validation = `^https?:\/\/[^\/]*`
_regexp_any_uppercase_nc = `[A-Z]`
_regexp_any_uppercase_c = `([A-Z])`
_regexp_is_ms_pref = `^ms-`
)
// TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern.
//
// re2 (Go) cannot do backtracking, so the presence of a lookahead (?=) (?!) or
// backreference (\1, \2, ...) will cause an error.
//
// re2 (Go) has a different definition for \s: [\t\n\f\r ].
// The JavaScript definition, on the other hand, also includes \v, Unicode "Separator, Space", etc.
//
// If the pattern is invalid (not valid even in JavaScript), then this function
// returns the empty string and an error.
//
// If the pattern is valid, but incompatible (contains a lookahead or backreference),
// then this function returns the transformation (a non-empty string) AND an error.
func TransformRegExp(pattern string) (string, error) {
log.Printf("runtime.Caller(1): %#+v\n", myutils.Slice(runtime.Caller(1))...)
os.Exit(349857394)
// if pattern == "" {
// return "", nil
// }
switch pattern {
// common cases that will leave as is
// but frequently occurred in many
// popular frameworks like react.js etc.
case _regexp_empty,
_regexp_any_nc,
_regexp_any_c,
_regexp_anyplus_nc,
_regexp_anyplus_c,
_regexp_squote_nc,
_regexp_squote_c,
_regexp_dquote_nc,
_regexp_dquote_c,
_regexp_plus_nc,
_regexp_plus_c,
_regexp_urlenc_space_nc,
_regexp_urlenc_space_c,
_regexp_http_validation,
_regexp_any_uppercase_nc,
_regexp_any_uppercase_c,
_regexp_is_ms_pref:
return pattern, nil
}
// TODO If without \, if without (?=, (?!, then another shortcut
parser := _RegExp_parser{
str: pattern,
length: len(pattern),
goRegexp: bytes.NewBuffer(make([]byte, 0, 3*len(pattern)/2)),
}
parser.read() // Pull in the first character
parser.scan()
var err error
if len(parser.errors) > 0 {
err = parser.errors[0]
}
if parser.invalid {
log.Printf("Input: regexp=[%s]\n", pattern)
log.Printf("Output: regexp=[%s] err=[%s]\n", parser.goRegexp.String(), err)
return "", err
}
// Might not be re2 compatible, but is still a valid JavaScript RegExp
return parser.goRegexp.String(), err
}