/
gonormalizer.go
382 lines (359 loc) 路 11.5 KB
/
gonormalizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
package gonormalizer
import (
"errors"
"fmt"
"net/url"
"strings"
"golang.org/x/net/idna"
)
// AddPort attaches the specified port to the end of URL
// Accepts URL and Port Number as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string with port number and nil
// In case of error return is empty string with a customized error
func AddPort(u string, p string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
if rxSlash.Match([]byte(u)) {
u, _ = StripTrailingSlash(u)
}
if rxColon.Match([]byte(u)) {
u = u[:len(u)-1]
}
if !rxPort.Match([]byte(u)) {
return u + ":" + p, nil
}
return "", errors.New("Port already exist")
}
// AddProtocol attaches the specified Protocol to the URL
// Accepts URL and Protocol as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string with protocol and nil
// In case of error return is empty string with a customized error
func AddProtocol(u string, p string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
if !rxHttp.Match([]byte(u)) && rxWWW.Match([]byte(u)) || !rxHttp.Match([]byte(u)) {
return p + "://" + u, nil
}
return "", errors.New("Protocol already Exists")
}
// AddTrailingSlash attaches the / to the end of URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string and nil
// In case of error return is empty string with a customized error
func AddTrailingSlash(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
if !rxSlash.Match([]byte(u)) {
return u + "/", nil
}
return "", errors.New("TrailingSlash Exist in URL")
}
// AddTrailingDot attaches the dot (.) to the end of URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string and nil
// In case of error return is empty string with a customized error
func AddTrailingDot(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
if rxSlash.Match([]byte(u)) {
u, _ = StripTrailingSlash(u)
}
return u + ".", nil
}
// DefaultProtocol attaches the http:// to the URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string and nil
// In case of error return is empty string with a customized error
func DefaultProtocol(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
if !rxHttp.Match([]byte(u)) && rxWWW.Match([]byte(u)) || !rxHttp.Match([]byte(u)) {
return "http://" + u, nil
}
return "", errors.New("Protocol already Exists")
}
// ForceHttp converts the URL from https to http
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string and nil
// In case of error return is empty string with a customized error
func ForceHttp(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxFhttp.FindString(u)
if rxFhttp.Match([]byte(u)) {
return strings.Replace(u, strTemp, "http:", -1), nil
}
return "", errors.New("Protocol does not exist")
}
// ForceHttps converts the URL from http to https
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string and nil
// In case of error return is empty string with a customized error
func ForceHttps(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxFhttps.FindString(u)
if rxFhttps.Match([]byte(u)) {
return strings.Replace(u, strTemp, "https:", -1), nil
}
return "", errors.New("Protocol does not exist")
}
// IsValid checks if URL is in format with the URL Pattern
// IsValid is used by every function in the library
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns true, else false
func IsValid(u string) bool {
if IsEmpty(u) {
return false
}
return rxC.Match([]byte(u))
}
// IsEmpty checks URL is empty or not
// IsEmpty is used by every function in the library
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns true, else false
func IsEmpty(u string) bool {
u = strings.TrimSpace(u)
return len(u) == 0
}
// LowerCase checks if the passed URL is in lowercase
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// it returns the lowercase string
func LowerCase(u string) string {
return strings.ToLower(u)
}
// Normalize is a generalized method for normalizing URL
// Accepts URL as a string argument
// Returns the normalized string and nil
// In case of error return is a unambiguous string with a customized error
func Normalize(s string) (string, error) {
s = strings.TrimSpace(s)
b := IsEmpty(s)
if b {
return s, errors.New("URL is Empty")
}
if strings.HasPrefix(s, "//") {
s = "http:" + s
}
u, err := url.Parse(s)
if err != nil {
return s, err
}
if u.Scheme == "" {
// Ugh...
u, err = url.Parse("http://" + s)
if err != nil {
return s, err
}
}
p, ok := defaultPorts[u.Scheme]
if ok {
u.Host = strings.TrimSuffix(u.Host, fmt.Sprintf(":%d", p))
}
punyToText, err := idna.ToUnicode(u.Host)
if err != nil {
return punyToText, err
} else {
u.Host = punyToText
}
u.Host = strings.TrimPrefix(u.Host, "www.")
v := u.Query()
u.RawQuery = v.Encode()
u.RawQuery, _ = url.QueryUnescape(u.RawQuery)
n := u.String()
n = strings.TrimSuffix(n, "/")
return n, nil
}
// StripTrailingSlash removes TrailingSlash / from the end of URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func StripTrailingSlash(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
if rxSlash.Match([]byte(u)) {
return u[:len(u)-1], nil
}
return "", errors.New("No TrailingSlash Exist")
}
// StripTrailingDot removes the dot (.) from the end of URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func StripTrailingDot(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
if rxTd.Match([]byte(u)) {
return u[:len(u)-1], nil
}
return "", errors.New("TrailingDot does not exist")
}
// Scheme presents us with the scheme or portocol of the URL
// internally Scheme uses url.Parse and scheme functions from url package
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func Scheme(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
urlTemp, err := url.Parse(u)
if err != nil {
return "", errors.New("Not a vaild URL")
}
return urlTemp.Scheme, nil
}
// StripProtocol removes the protocol from URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func StripProtocol(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxHttp.FindString(u)
if rxHttp.Match([]byte(u)) {
return strings.Replace(u, strTemp, "", -1), nil
}
return "", errors.New("No Protocol Exist")
}
// StripWWW removes the www. from URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func StripWWW(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxWWW.FindString(u)
if rxWWW.Match([]byte(u)) {
return strings.Replace(u, strTemp, "", -1), nil
}
return "", errors.New("www does not exist")
}
// StripHash removes the # and contents after #example from URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func StripHash(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxShash.FindString(u)
if rxShash.Match([]byte(u)) {
return strings.Replace(u, strTemp, "", -1), nil
}
return "", errors.New("Hash does not exist")
}
// StripTextFragment removes text fragments from the end of URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func StripTextFragment(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxShash.FindString(u)
if rxShash.Match([]byte(u)) {
return strings.Replace(u, strTemp, "", -1), nil
}
return "", errors.New("Text Fragment does not exist")
}
// StripAuthentication removes authentication from the end of URL
// Expected input format
// "user:password@@example.com", "https://user:password@@example.com"
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// Returns the modified string and nil
// In case of error return is empty string with a customized error
func StripAuthentication(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxSauth.FindString(u)
strURL := rxHttp.FindString(u)
if rxSauth.Match([]byte(u)) {
x := strings.Replace(u, strTemp, "", -1)
x = strURL + x
return x, nil
}
return "", errors.New("Authentication does not exist")
}
// StripPort detaches the port from URL
// Accepts URL as a string argument
// if string matches the patterns (pattern is regular expression) then
// returns the modified string with port number and nil
// In case of error return is empty string with a customized error
func StripPort(u string) (string, error) {
if !IsValid(u) && !IsEmpty(u) {
return "", errors.New("Not a vaild URL")
}
u = TrimURL(u)
u = LowerCase(u)
strTemp := rxPort.FindString(u)
if rxPort.Match([]byte(u)) {
return strings.Replace(u, strTemp, "", -1), nil
}
return "", errors.New("Port Does not exist")
}
// TrimURL checks if the passed string URL have any spaces to left or right side
// Accepts URL as a string argument
// returns modified string
func TrimURL(u string) string {
u = strings.TrimSpace(u)
return u
}