-
Notifications
You must be signed in to change notification settings - Fork 23
/
Url.roc
452 lines (417 loc) · 15.4 KB
/
Url.roc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
module [
Url,
append,
fromStr,
toStr,
appendParam,
hasQuery,
hasFragment,
query,
fragment,
reserve,
withQuery,
withFragment,
]
## A [Uniform Resource Locator](https://en.wikipedia.org/wiki/URL).
##
## It could be an absolute address, such as `https://roc-lang.org/authors` or
## a relative address, such as `/authors`. You can create one using [Url.fromStr].
Url := Str implements [Inspect]
## Reserve the given number of bytes as extra capacity. This can avoid reallocation
## when calling multiple functions that increase the length of the URL.
##
## The following example reserves 50 bytes, then builds the url `https://example.com/stuff?caf%C3%A9=du%20Monde&email=hi%40example.com`;
## ```
## Url.fromStr "https://example.com"
## |> Url.reserve 50
## |> Url.append "stuff"
## |> Url.appendParam "café" "du Monde"
## |> Url.appendParam "email" "hi@example.com"
## ```
## The [Str.countUtf8Bytes](https://www.roc-lang.org/builtins/Str#countUtf8Bytes) function can be helpful in finding out how many bytes to reserve.
##
## There is no `Url.withCapacity` because it's better to reserve extra capacity
## on a [Str] first, and then pass that string to [Url.fromStr]. This function will make use
## of the extra capacity.
reserve : Url, U64 -> Url
reserve = \@Url str, cap ->
@Url (Str.reserve str (Num.intCast cap))
## Create a [Url] without validating or [percent-encoding](https://en.wikipedia.org/wiki/Percent-encoding)
## anything.
##
## ```
## Url.fromStr "https://example.com#stuff"
## ```
##
## URLs can be absolute, like `https://example.com`, or they can be relative, like `/blah`.
##
## ```
## Url.fromStr "/this/is#relative"
## ```
##
## Since nothing is validated, this can return invalid URLs.
##
## ```
## Url.fromStr "https://this is not a valid URL, not at all!"
## ```
##
## Naturally, passing invalid URLs to functions that need valid ones will tend to result in errors.
##
fromStr : Str -> Url
fromStr = \str -> @Url str
## Return a [Str] representation of this URL.
## ```
## # Gives "https://example.com/two%20words"
## Url.fromStr "https://example.com"
## |> Url.append "two words"
## |> Url.toStr
## ```
toStr : Url -> Str
toStr = \@Url str -> str
## [Percent-encodes](https://en.wikipedia.org/wiki/Percent-encoding) a
## [path component](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Syntax)
## and appends to the end of the URL's path.
##
## This will be appended before any queries and fragments. If the given path string begins with `/` and the URL already ends with `/`, one
## will be ignored. This avoids turning a single slash into a double slash. If either the given URL or the given string is empty, no `/` will be added.
##
## ```
## # Gives https://example.com/some%20stuff
## Url.fromStr "https://example.com"
## |> Url.append "some stuff"
##
## # Gives https://example.com/stuff?search=blah#fragment
## Url.fromStr "https://example.com?search=blah#fragment"
## |> Url.append "stuff"
##
## # Gives https://example.com/things/stuff/more/etc/"
## Url.fromStr "https://example.com/things/"
## |> Url.append "/stuff/"
## |> Url.append "/more/etc/"
##
## # Gives https://example.com/things
## Url.fromStr "https://example.com/things"
## |> Url.append ""
## ```
append : Url, Str -> Url
append = \@Url urlStr, suffixUnencoded ->
suffix = percentEncode suffixUnencoded
when Str.splitFirst urlStr "?" is
Ok { before, after } ->
bytes =
Str.countUtf8Bytes before
+ 1 # for "/"
+ Str.countUtf8Bytes suffix
+ 1 # for "?"
+ Str.countUtf8Bytes after
before
|> Str.reserve bytes
|> appendHelp suffix
|> Str.concat "?"
|> Str.concat after
|> @Url
Err NotFound ->
# There wasn't a query, but there might still be a fragment
when Str.splitFirst urlStr "#" is
Ok { before, after } ->
bytes =
Str.countUtf8Bytes before
+ 1 # for "/"
+ Str.countUtf8Bytes suffix
+ 1 # for "#"
+ Str.countUtf8Bytes after
before
|> Str.reserve bytes
|> appendHelp suffix
|> Str.concat "#"
|> Str.concat after
|> @Url
Err NotFound ->
# No query and no fragment, so just append it
@Url (appendHelp urlStr suffix)
## Internal helper
appendHelp : Str, Str -> Str
appendHelp = \prefix, suffix ->
if Str.endsWith prefix "/" then
if Str.startsWith suffix "/" then
# Avoid a double-slash by appending only the part of the suffix after the "/"
when Str.splitFirst suffix "/" is
Ok { after } ->
# TODO `expect before == ""`
Str.concat prefix after
Err NotFound ->
# This should never happen, because we already verified
# that the suffix startsWith "/"
# TODO `expect Bool.false` here with a comment
Str.concat prefix suffix
else
# prefix ends with "/" but suffix doesn't start with one, so just append.
Str.concat prefix suffix
else if Str.startsWith suffix "/" then
# Suffix starts with "/" but prefix doesn't end with one, so just append them.
Str.concat prefix suffix
else if Str.isEmpty prefix then
# Prefix is empty; return suffix.
suffix
else if Str.isEmpty suffix then
# Suffix is empty; return prefix.
prefix
else
# Neither is empty, but neither has a "/", so add one in between.
prefix
|> Str.concat "/"
|> Str.concat suffix
## Internal helper. This is intentionally unexposed so that you don't accidentally
## double-encode things. If you really want to percent-encode an arbitrary string,
## you can always do:
##
## ```
## Url.fromStr ""
## |> Url.append myStrToEncode
## |> Url.toStr
## ```
##
## > It is recommended to encode spaces as `%20`, the HTML 2.0 specification
## suggests that these can be encoded as `+`, however this is not always safe to
## use. See [this stackoverflow discussion](https://stackoverflow.com/questions/2678551/when-should-space-be-encoded-to-plus-or-20/47188851#47188851)
## for a detailed explanation.
percentEncode : Str -> Str
percentEncode = \input ->
# Optimistically assume we won't need any percent encoding, and can have
# the same capacity as the input string. If we're wrong, it will get doubled.
initialOutput = List.withCapacity (Str.countUtf8Bytes input |> Num.intCast)
answer =
List.walk (Str.toUtf8 input) initialOutput \output, byte ->
# Spec for percent-encoding: https://www.ietf.org/rfc/rfc3986.txt
if
(byte >= 97 && byte <= 122) # lowercase ASCII
|| (byte >= 65 && byte <= 90) # uppercase ASCII
|| (byte >= 48 && byte <= 57) # digit
then
# This is the most common case: an unreserved character,
# which needs no encoding in a path
List.append output byte
else
when byte is
46 # '.'
| 95 # '_'
| 126 # '~'
| 150 -> # '-'
# These special characters can all be unescaped in paths
List.append output byte
_ ->
# This needs encoding in a path
suffix =
Str.toUtf8 percentEncoded
|> List.sublist { len: 3, start: 3 * Num.intCast byte }
List.concat output suffix
Str.fromUtf8 answer
|> Result.withDefault "" # This should never fail
## Adds a [Str] query parameter to the end of the [Url].
##
## The key and value both get [percent-encoded](https://en.wikipedia.org/wiki/Percent-encoding).
##
## ```
## # Gives https://example.com?email=someone%40example.com
## Url.fromStr "https://example.com"
## |> Url.appendParam "email" "someone@example.com"
## ```
##
## This can be called multiple times on the same URL.
##
## ```
## # Gives https://example.com?caf%C3%A9=du%20Monde&email=hi%40example.com
## Url.fromStr "https://example.com"
## |> Url.appendParam "café" "du Monde"
## |> Url.appendParam "email" "hi@example.com"
## ```
##
appendParam : Url, Str, Str -> Url
appendParam = \@Url urlStr, key, value ->
{ withoutFragment, afterQuery } =
when Str.splitLast urlStr "#" is
Ok { before, after } ->
# The fragment is almost certainly going to be a small string,
# so this interpolation should happen on the stack.
{ withoutFragment: before, afterQuery: "#$(after)" }
Err NotFound ->
{ withoutFragment: urlStr, afterQuery: "" }
encodedKey = percentEncode key
encodedValue = percentEncode value
bytes =
Str.countUtf8Bytes withoutFragment
+ 1 # for "?" or "&"
+ Str.countUtf8Bytes encodedKey
+ 1 # for "="
+ Str.countUtf8Bytes encodedValue
+ Str.countUtf8Bytes afterQuery
withoutFragment
|> Str.reserve bytes
|> Str.concat (if hasQuery (@Url withoutFragment) then "&" else "?")
|> Str.concat encodedKey
|> Str.concat "="
|> Str.concat encodedValue
|> Str.concat afterQuery
|> @Url
## Replaces the URL's [query](https://en.wikipedia.org/wiki/URL#Syntax)—the part
## after the `?`, if it has one, but before any `#` it might have.
##
## Passing `""` removes the `?` (if there was one).
##
## ```
## # Gives https://example.com?newQuery=thisRightHere#stuff
## Url.fromStr "https://example.com?key1=val1&key2=val2#stuff"
## |> Url.withQuery "newQuery=thisRightHere"
##
## # Gives https://example.com#stuff
## Url.fromStr "https://example.com?key1=val1&key2=val2#stuff"
## |> Url.withQuery ""
## ```
withQuery : Url, Str -> Url
withQuery = \@Url urlStr, queryStr ->
{ withoutFragment, afterQuery } =
when Str.splitLast urlStr "#" is
Ok { before, after } ->
# The fragment is almost certainly going to be a small string,
# so this interpolation should happen on the stack.
{ withoutFragment: before, afterQuery: "#$(after)" }
Err NotFound ->
{ withoutFragment: urlStr, afterQuery: "" }
beforeQuery =
when Str.splitLast withoutFragment "?" is
Ok { before } -> before
Err NotFound -> withoutFragment
if Str.isEmpty queryStr then
@Url (Str.concat beforeQuery afterQuery)
else
bytes =
Str.countUtf8Bytes beforeQuery
+ 1 # for "?"
+ Str.countUtf8Bytes queryStr
+ Str.countUtf8Bytes afterQuery
beforeQuery
|> Str.reserve bytes
|> Str.concat "?"
|> Str.concat queryStr
|> Str.concat afterQuery
|> @Url
## Returns the URL's [query](https://en.wikipedia.org/wiki/URL#Syntax)—the part after
## the `?`, if it has one, but before any `#` it might have.
##
## Returns `""` if the URL has no query.
##
## ```
## # Gives "key1=val1&key2=val2&key3=val3"
## Url.fromStr "https://example.com?key1=val1&key2=val2&key3=val3#stuff"
## |> Url.query
##
## # Gives ""
## Url.fromStr "https://example.com#stuff"
## |> Url.query
## ```
##
query : Url -> Str
query = \@Url urlStr ->
withoutFragment =
when Str.splitLast urlStr "#" is
Ok { before } -> before
Err NotFound -> urlStr
when Str.splitLast withoutFragment "?" is
Ok { after } -> after
Err NotFound -> ""
## Returns [Bool.true] if the URL has a `?` in it.
##
## ```
## # Gives Bool.true
## Url.fromStr "https://example.com?key=value#stuff"
## |> Url.hasQuery
##
## # Gives Bool.false
## Url.fromStr "https://example.com#stuff"
## |> Url.hasQuery
## ```
##
hasQuery : Url -> Bool
hasQuery = \@Url urlStr ->
# TODO use Str.contains once it exists. It should have a "fast path"
# with SIMD iteration if the string is small enough to fit in a SIMD register.
Str.toUtf8 urlStr
|> List.contains (Num.toU8 '?')
## Returns the URL's [fragment](https://en.wikipedia.org/wiki/URL#Syntax)—the part after
## the `#`, if it has one.
##
## Returns `""` if the URL has no fragment.
##
## ```
## # Gives "stuff"
## Url.fromStr "https://example.com#stuff"
## |> Url.fragment
##
## # Gives ""
## Url.fromStr "https://example.com"
## |> Url.fragment
## ```
##
fragment : Url -> Str
fragment = \@Url urlStr ->
when Str.splitLast urlStr "#" is
Ok { after } -> after
Err NotFound -> ""
## Replaces the URL's [fragment](https://en.wikipedia.org/wiki/URL#Syntax).
##
## If the URL didn't have a fragment, adds one. Passing `""` removes the fragment.
##
## ```
## # Gives https://example.com#things
## Url.fromStr "https://example.com#stuff"
## |> Url.withFragment "things"
##
## # Gives https://example.com#things
## Url.fromStr "https://example.com"
## |> Url.withFragment "things"
##
## # Gives https://example.com
## Url.fromStr "https://example.com#stuff"
## |> Url.withFragment ""
## ```
##
withFragment : Url, Str -> Url
withFragment = \@Url urlStr, fragmentStr ->
when Str.splitLast urlStr "#" is
Ok { before } ->
if Str.isEmpty fragmentStr then
# If the given fragment is empty, remove the URL's fragment
@Url before
else
# Replace the URL's old fragment with this one, discarding `after`
@Url "$(before)#$(fragmentStr)"
Err NotFound ->
if Str.isEmpty fragmentStr then
# If the given fragment is empty, leave the URL as having no fragment
@Url urlStr
else
# The URL didn't have a fragment, so give it this one
@Url "$(urlStr)#$(fragmentStr)"
## Returns [Bool.true] if the URL has a `#` in it.
##
## ```
## # Gives Bool.true
## Url.fromStr "https://example.com?key=value#stuff"
## |> Url.hasFragment
##
## # Gives Bool.false
## Url.fromStr "https://example.com?key=value"
## |> Url.hasFragment
## ```
##
hasFragment : Url -> Bool
hasFragment = \@Url urlStr ->
# TODO use Str.contains once it exists. It should have a "fast path"
# with SIMD iteration if the string is small enough to fit in a SIMD register.
Str.toUtf8 urlStr
|> List.contains (Num.toU8 '#')
# Adapted from the percent-encoding crate, © The rust-url developers, Apache2-licensed
#
# https://github.com/servo/rust-url/blob/e12d76a61add5bc09980599c738099feaacd1d0d/percent_encoding/src/lib.rs#L183
percentEncoded : Str
percentEncoded = "%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F%30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F%40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F%60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF"