-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lang.agda
393 lines (276 loc) · 10.8 KB
/
Lang.agda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
module Lang where
open import Agda.Primitive
--open import BaseLogic
open import Category
open import Data.List
open import Data.List.Operations
open import Data.Bool
open import Data.Bool.Operations
open import Data.Nat
open import Data.Maybe
open import Data.String
open import Data.Disjunction
open import Data.Product
{-
--http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf
-- Intuitively, a parser should take a string and produce some kind of tree:
Parser₁ : Set
Parser₁ = String → Tree
-- A parser might not consume the entire input though, so we might return the unconsumed suffix
-- of the string along with the resulting tree.
Parser₂ : Set
Parser₂ = String → Tree × String
-- A parser might also fail on its input string. Rather than just report a run-time error when
-- this happens, we choose to have parsers return a list of pairs rather than just a single pair
-- with the convention that the empty list denotes failure of the parser, and the singleton
-- list denotes success:
Parser₃ : Set
Parser₃ = String → List (Tree × String)
-}
-- Different parsers can potentially return different kinds of trees, so we should further
-- abstract on the specific type of trees:
Parser₄ : ∀ {i} → Set i → Set i
Parser₄ A = String → List ( A × String)
{-
This works for the type of the parser, but for building total parser combinators we need
to be able to structurally recurse on the input strings, and since Agda doesn't appear to
support this directly and instead requires the input strings to be transformed into Lists
of Chars first, we'll need to modify the parser type as such:
-}
Parser₄' : ∀ {i} → Set i → Set i
Parser₄' A = List Char → List (A × String)
{-
Each parser can be constructed to accept Strings as input for convenience but then call a
"primed" version of itself on the List Char returned from "(primStringToList input)" in
order to build the parsers in a structurally recursive form.
-}
-- "result" from Graham & Hutton's "Monadic Parser Combinators"
-- The three primitive parsers:
-- Always succeeds without consuming any input
{-
result : {A : Set} → A → String → List (A × String)
-}
result : {A : Set} → A → Parser₄ A
result v = λ inp → (v , inp) ∷ []
-- This is the same as "succeed" from [3]
-- Which is also the same as "succeed" from [4]
succeed : {A : Set} → A → Parser₄ A
succeed = result
-- "zero" from Graham & Hutton's "Monadic Parser Combinators"
-- shouldn't this be "{A : Set} → A → Parser₄ A" ?
-- Always fails, regardless of the input
zeroP : {A : Set} → Parser₄ A
zeroP = λ inp → []
-- This is the same as "fail" from [3]
-- And also the same as "fail" from [4]
fail : {A : Set} → Parser₄ A
fail = zeroP
{-
-- "satisfy" from [3]
-- which is the same as "satisfy" from [4]
-}
satisfy' : (P : Char → Bool) → List Char → List (Char × String)
satisfy' p [] = fail ""
satisfy' p (x ∷ xs) = if (p x) then (succeed {Char} x (primStringFromList xs)) else (fail (primStringFromList xs))
satisfy : (P : Char → Bool) → Parser₄ Char
satisfy p x = satisfy' p (primStringToList x)
-- "item" from Graham & Hutton's "Monadic Parser Combinators"
-- Successfully consumes the first character if the input string is non-empty, and fails otherwise.
{-
item : String → List ( Char × String)
-}
item' : List Char → List ( Char × String)
item' [] = []
item' (x ∷ xs) = (x , (primStringFromList xs)) ∷ []
item : Parser₄ Char
item x = item' (primStringToList x)
any : Parser₄ Char
any = item
inString' : Char → List Char → Bool
inString' c [] = false
inString' c (x ∷ xs) = if (primStringEquality (primShowChar c) (primShowChar x)) then true else (inString' c xs)
inString : Char → String → Bool
inString c s = inString' c (primStringToList s)
digits : String
digits = "0123456789"
isDigit : Char → Bool
isDigit c = inString c digits
parseDigit : Parser₄ Char
parseDigit x = satisfy isDigit x
lcChars : String
lcChars = "abcdefghijklmnopqrstuvwxyz"
isLower : Char → Bool
isLower c = inString c lcChars
parseLower : Parser₄ Char
parseLower x = satisfy isLower x
ucChars : String
ucChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
isUpper : Char → Bool
isUpper c = inString c ucChars
parseUpper : Parser₄ Char
parseUpper x = satisfy isUpper x
{-
-- "literal" from [3]
literal : Parser₄ Char
-}
charEqual : Char → Char → Bool
charEqual c₁ c₂ = primStringEquality (primShowChar c₁) (primShowChar c₂)
parseChar : Char → Parser₄ Char
parseChar c x = satisfy (charEqual c) x
-- this is the same as "literal" from [4]
literal : Char → Parser₄ Char
literal = parseChar
fcomp : ∀ {i j k} {A : Set i} {B : Set j} {C : Set k} →
(B → C) → (A → B) → A → C
fcomp g f x = g (f x)
{-
-- "alternative" combinator from Schirmer's [3]
-- this is the same as the "alt" combinator from [4]
-}
{-
The "alt" combinator in [4] alternates two parsers of the same type.
But the "then" combinator from [4] sequences two parsers of different types.
So I've implemented ₁ and ₂ versions of the "alt" parser, for alternating
parsers of the same type and different types, respectively.
Due to the fact that the "then" combinator in [4] sequences parsers of two
different types, but the "alt" combinator does not, leads me to assume
that this is only because it's slightly more difficult to build the two-typed
alternation than it is to build the singly-typed alternation, because you have
to do unpackage things out of the A × String / B × String pairs, inl/inr them
into (A ⊹ B) and then re-pair them with the String into (A ⊹ B) × String pairs
in order to make it work.
The singly-typed alternation combinator is definitely easier to understand and
should be examined first before moving on to the generalized alternation
combinator.
-}
{-
You can have exclusive alternation, which means that if the first parser
succeeds then the second parser will not be used, and the only possibilities
are to obtain either one result or no results.
-}
alt-exc₁ : ∀ {i} {A : Set i} → Parser₄ A → Parser₄ A → Parser₄ A
alt-exc₁ {i} {A} p1 p2 inp = if (not (isEmpty (p1 inp))) then (p1 inp) else (p2 inp)
alt-exc₂ : ∀ {i j} {A : Set i} {B : Set j} → Parser₄ A → Parser₄ B → Parser₄ (A ⊹ B)
alt-exc₂ {i} {j} {A} {B} p1 p2 inp = if (not (isEmpty (p1 inp))) then (list-inl {i} {j} {lzero} {A} {B} {String} (p1 inp)) else (list-inr {i} {j} {lzero} {A} {B} {String} (p2 inp))
{-
The other option is to have inclusive alternation, which means that both
parsers can be used, and if they both succeed (as in the case of an
ambiguous grammar) then it is possible to have multiple results.
-}
alt-inc₁ : ∀ {i} {A : Set i} → Parser₄ A → Parser₄ A → Parser₄ A
alt-inc₁ {i} {A} p1 p2 inp = (p1 inp) ++ (p2 inp)
alt-inc₂ : ∀ {i j} {A : Set i} {B : Set j} → Parser₄ A → Parser₄ B → Parser₄ (A ⊹ B)
alt-inc₂ {i} {j} {A} {B} p1 p2 inp = _++_ {i ⊔ j} {(A ⊹ B) × String} (list-inl {i} {j} {lzero} {A} {B} {String} (p1 inp)) (list-inr {i} {j} {lzero} {A} {B} {String} (p2 inp))
{-
Both the exclusive and inclusive alternation could be made to operate in parallel.
-}
{-
-- "next" combinator from [3]
-}
{-
-- "then" combinator from [4]
[4] uses "list comprehension notation" to express the set of all pairs from each list
[5] shows an implementation of this "allPairs" function
We need something slightly more general than cartesian products of Lists though.
For each element in the list produced by the first parser, we must call the second
parser on the remaining input string.
-}
then : ∀ {i j} {A : Set i} {B : Set j} → Parser₄ A → Parser₄ B → Parser₄ (A × B)
then {i} {j} {A} {B} p1 p2 inp = concat (listmap (λ x → (listmap (λ y → (((first x) , (first y)) , (second y))) (p2 (second x)))) (p1 inp))
{-
-- Non-monadic sequencing combinator:
_seq'_ : {A B : Set} → Parser₄ A → Parser₄ B → Parser₄ (A × B)
p seq' q = λ inp → ((
-}
{-
-- "using" combinator from [3]
-- same as the "using" combinator from [4]
-}
using' : ∀ {i j} {A : Set i} {B : Set j} → Parser₄ A → (A → B) → Parser₄ B
using' {i} {j} {A} {B} p f inp = listmap (λ x → ((f (first x)) , (second x))) (p inp)
{-
-- "many" combinator from [4]
the "many" combinator from [4] is not structurally recursive!
"many p = ((p $then many p) $using cons) $alt (succeed [])"
How to make it so? By structural recursion on the input string of course!
This might not be entirely straight-forward, however, since a parser may
consume multiple characters at a time, we might not be able to do structural
recursion directly on the List constructors but might have to defer
termination proofs to more general size-change arguments.
-}
{-
many' : ∀ {i} {A : Set i} → Parser₄ A → Parser₄' (List A)
many' {i} {A} p [] = ...?
many' {i} {A} p (x ∷ xs) = ...?
many : ∀ {i} {A : Set i} → Parser₄ A → Parser₄ (List A)
many {i} {A} p s = many' {i} {A} p (primStringToList s)
-}
{-
-- Monadic parser combinators
-}
data Reply {α} (A : Set α) : Set α where
error : Reply A
ok : A → String → Reply A
data Consumed {α} (A : Set α) : Set α where
consumed : Reply A → Consumed A
empty : Reply A → Consumed A
Parser : ∀ {α} (A : Set α) → Set α
Parser A = String → Consumed A
{-
[1] Graham Hutton; Erik Meijer
"Monadic Parser Combinators"
http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf
[2] Philip Wadler;
"How to Replace Failure by a List of Successes:
A method for exception handling, backtracking, and pattern matching
in lazy functional languages"
https://rkrishnan.org/files/wadler-1985.pdf
Defines:
-- lit
-- empty
-- fail
-- alt
-- seq
-- rep
-- rep1
-- alts
-- seqs
-- lits
[3] Stefanie Schirmer; DomCode;
"Parsers All the Way Down?"
https://www.youtube.com/watch?v=oU2418-8_KI
[4] Graham Hutton
"Higher Order Functions for Parsing"
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.63.3555&rep=rep1&type=pdf
-- succeed
-- fail
-- satisfy
-- literal
-- alt (singly-typed ; inclusive)
-- then
-- using
-- many
-- some
-- number
-- word
-- string
-- xthen
-- thenx
-- nibble
-- any
-- symbol
-- offside
-- into
[5] gallais
on list-comprehensions in Agda:
http://stackoverflow.com/questions/31394404/agda-forming-all-pairs-x-y-x-in-xs-y-in-ys
https://gist.github.com/gallais/2e31c020937198a85529
[6] Nils Anders Danielsson
"Total Parser Combinators" (in Agda)
http://www.cse.chalmers.se/~nad/publications/danielsson-parser-combinators.pdf
[7] Agda Sized Types
http://agda.readthedocs.io/en/latest/language/sized-types.html?highlight=sized%20types
[8] Alexandre Agular, Bassel Mannaa
Regular Expressions in Agda
http://www.cse.chalmers.se/~bassel/report.pdf
-}