Permalink
Browse files

Add Text.Parsing.Parser.Language and Token modules.

This commit adds the `Text.Parsing.Parser.Language` and
`Text.Parsing.Parser.Token` modules.  This is a straight port of modules
of the same name in Haskell's `parsec` library.

This commit also adds the `(<??>)` combinator, which acts like a flipped
`(<?>)`.  It is convenient to use with monadic parsers:

```purescript
foo :: Parser String Unit
foo = "some message" <??> do
    string "foo"
    bar
    pure unit
```
  • Loading branch information...
cdepillabout committed Jan 21, 2016
1 parent 60f4abb commit 4187411e912d5dd55ca5ef3f1c5cd35a75c194ef
Showing with 1,219 additions and 11 deletions.
  1. +3 −1 bower.json
  2. +4 −1 src/Text/Parsing/Parser/Combinators.purs
  3. +125 −0 src/Text/Parsing/Parser/Language.purs
  4. +745 −0 src/Text/Parsing/Parser/Token.purs
  5. +342 −9 test/Main.purs
@@ -29,7 +29,9 @@
"purescript-lists": "^0.7.0",
"purescript-maybe": "^0.3.0",
"purescript-strings": "~0.7.0",
"purescript-transformers": "^0.8.1"
"purescript-transformers": "^0.8.1",
"purescript-unicode": "git://github.com/purescript-contrib/purescript-unicode#unicode",
"purescript-integers": "^0.2.0"
},
"devDependencies": {
"purescript-console": "^0.1.0",
@@ -38,6 +38,10 @@ import Text.Parsing.Parser
(<?>) :: forall m s a. (Monad m) => ParserT s m a -> String -> ParserT s m a
(<?>) p msg = p <|> fail ("Expected " ++ msg)

-- | Flipped `(<?>)`.
(<??>) :: forall m s a. (Monad m) => String -> ParserT s m a -> ParserT s m a
(<??>) = flip (<?>)

-- | Wrap a parser with opening and closing markers.
-- |
-- | For example:
@@ -196,4 +200,3 @@ many1Till p end = do
x <- p
xs <- manyTill p end
return (x:xs)

@@ -0,0 +1,125 @@

module Text.Parsing.Parser.Language
-- ( haskellDef, haskell
-- , mondrianDef, mondrian
-- , emptyDef
-- , haskellStyle
-- , javaStyle
-- )
where

import Prelude

import Control.Alt

import Text.Parsing.Parser
import Text.Parsing.Parser.String
import Text.Parsing.Parser.Token

-----------------------------------------------------------
-- Styles: haskellStyle, javaStyle
-----------------------------------------------------------

-- | This is a minimal token definition for Haskell style languages. It
-- defines the style of comments, valid identifiers and case
-- sensitivity. It does not define any reserved words or operators.

haskellStyle :: LanguageDef
haskellStyle = LanguageDef (unGenLanguageDef emptyDef)
{ commentStart = "{-"
, commentEnd = "-}"
, commentLine = "--"
, nestedComments = true
, identStart = letter
, identLetter = alphaNum <|> oneOf ['_', '\'']
, opStart = op'
, opLetter = op'
, reservedOpNames = []
, reservedNames = []
, caseSensitive = true
}
where
op' :: forall m . (Monad m) => ParserT String m Char
op' = oneOf [':', '!', '#', '$', '%', '&', '*', '+', '.', '/', '<', '=', '>', '?', '@', '\\', '^', '|', '-', '~']

-- | This is a minimal token definition for Java style languages. It
-- defines the style of comments, valid identifiers and case
-- sensitivity. It does not define any reserved words or operators.

javaStyle :: LanguageDef
javaStyle = LanguageDef (unGenLanguageDef emptyDef)
{ commentStart = "/*"
, commentEnd = "*/"
, commentLine = "//"
, nestedComments = true
, identStart = letter
, identLetter = alphaNum <|> oneOf ['_', '\'']
, reservedNames = []
, reservedOpNames = []
, caseSensitive = false
}

-----------------------------------------------------------
-- minimal language definition
--------------------------------------------------------

-- | This is the most minimal token definition. It is recommended to use
-- this definition as the basis for other definitions. `emptyDef` has
-- no reserved names or operators, is case sensitive and doesn't accept
-- comments, identifiers or operators.

emptyDef :: LanguageDef
emptyDef = LanguageDef
{ commentStart: ""
, commentEnd: ""
, commentLine: ""
, nestedComments: true
, identStart: letter <|> char '_'
, identLetter: alphaNum <|> oneOf ['_', '\'']
, opStart: op'
, opLetter: op'
, reservedOpNames: []
, reservedNames: []
, caseSensitive: true
}
where
op' :: forall m . (Monad m) => ParserT String m Char
op' = oneOf [':', '!', '#', '$', '%', '&', '*', '+', '.', '/', '<', '=', '>', '?', '@', '\\', '^', '|', '-', '~']

-- -----------------------------------------------------------
-- -- Haskell
-- -----------------------------------------------------------

-- -- | A lexer for the haskell language.

haskell :: TokenParser
haskell = makeTokenParser haskellDef

-- -- | The language definition for the Haskell language.

haskellDef :: LanguageDef
haskellDef =
case haskell98Def of
(LanguageDef def) -> LanguageDef def
{ identLetter = def.identLetter <|> char '#'
, reservedNames = def.reservedNames <>
["foreign","import","export","primitive"
,"_ccall_","_casm_"
,"forall"
]
}

-- -- | The language definition for the language Haskell98.

haskell98Def :: LanguageDef
haskell98Def = LanguageDef (unGenLanguageDef haskellStyle)
{ reservedOpNames = ["::","..","=","\\","|","<-","->","@","~","=>"]
, reservedNames = [ "let","in","case","of","if","then","else"
, "data","type"
, "class","default","deriving","do","import"
, "infix","infixl","infixr","instance","module"
, "newtype","where"
, "primitive"
-- "as","qualified","hiding"
]
}
Oops, something went wrong.

0 comments on commit 4187411

Please sign in to comment.