Skip to content
Browse files

broken commit: parser annotation enhancement

  • Loading branch information...
1 parent 734ab84 commit c989029c229e8bb164bab5bf71b987b213046e86 Arjun Guha committed Mar 12, 2009
Showing with 128 additions and 62 deletions.
  1. +62 −14 src/WebBits/JavaScript/Lexer.hs
  2. +66 −48 src/WebBits/JavaScript/Parser.hs
View
76 src/WebBits/JavaScript/Lexer.hs
@@ -1,33 +1,83 @@
-{- This isn't a lexer in the sense that it provides a JavaScript token-stream.
- - This module provides character-parsers for various JavaScript tokens.
- -}
module WebBits.JavaScript.Lexer(lexeme,identifier,reserved,operator,reservedOp,charLiteral,
stringLiteral,natural,integer,float,naturalOrFloat,
decimal,hexadecimal,octal,symbol,whiteSpace,parens,
braces,brackets,squares,semi,comma,colon,dot,
identifierStart) where
import Prelude hiding (lex)
+import Control.Monad
+import qualified Data.List as L
import Text.ParserCombinators.Parsec
import qualified Text.ParserCombinators.Parsec.Token as T
-
+import Text.ParserCombinators.Parsec.Char
identifierStart = (letter <|> oneOf "$_")
+
+lineComment :: CharParser st String
+lineComment = do
+ try $ string "//"
+ manyTill anyChar (char '\n' <|> (eof >> return ' '))
+
+blockComment :: CharParser st String
+blockComment = do
+ try $ string "/*"
+ manyTill anyChar (try $ string "*/")
+
+comment :: CharParser st String
+comment = lineComment <|> blockComment
+
+comments :: CharParser st [String]
+comments = do
+ spaces
+ (liftM2 (:) comment comments) <|> (return [])
+
+-- |Parse whitespace and returns the last comment in the block of whitespace,
+-- if any.
+whiteSpace :: CharParser st (Maybe String)
+whiteSpace = do
+ r <- comments
+ case r of
+ [] -> return Nothing
+ xs -> return (Just $ L.last r)
+
+reservedWords :: [String]
+reservedWords =
+ ["break", "case", "catch", "const", "continue", "debugger",
+ "default", "delete", "do", "else", "enum", "false", "finally",
+ "for", "function", "if", "instanceof", "in", "let", "new",
+ "null", "return", "switch", "this", "throw", "true", "try",
+ "typeof", "var", "void", "while", "with"]
+-- reserved
+
+operatorRest :: CharParser st Char
+operatorRest = oneOf "=<>|&+"
+
+identifierRest :: CharParser st Char
+identifierRest = alphaNum <|> oneOf "$_" -- identifier rest
+
+reserved :: String -> CharParser st (String,Maybe String)
+reserved word = do
+ try $ string word >> notFollowedBy identifierRest
+ c <- whiteSpace
+ return (word,c)
+
+reservedOp :: String -> CharParser st (String,Maybe String)
+reservedOp op = do
+ try $ string op >> notFollowedBy operatorRest
+ c <- whiteSpace
+ return (op,c)
+
javascriptDef =
T.LanguageDef "/*"
"*/"
"//"
False -- no nested comments
{- Adapted from syntax/regexps.ss in Dave's code. -}
identifierStart
- (alphaNum <|> oneOf "$_") -- identifier rest
+ identifierRest
(oneOf "{}<>()~.,?:|&^=!+-*/%!") -- operator start
- (oneOf "=<>|&+") -- operator rest
- ["break", "case", "catch", "const", "continue", "debugger",
- "default", "delete", "do", "else", "enum", "false", "finally",
- "for", "function", "if", "instanceof", "in", "let", "new",
- "null", "return", "switch", "this", "throw", "true", "try",
- "typeof", "var", "void", "while", "with"]
+ operatorRest
+ reservedWords
["|=", "^=", "&=", "<<=", ">>=", ">>>=", "+=", "-=", "*=", "/=",
"%=", "=", ";", ",", "?", ":", "||", "&&", "|", "^", "&",
"===", "==", "=", "!==", "!=", "<<", "<=", "<", ">>>", ">>",
@@ -39,9 +89,7 @@ lex = T.makeTokenParser javascriptDef
-- everything but commaSep and semiSep
identifier = T.identifier lex
-reserved = T.reserved lex
operator = T.operator lex
-reservedOp = T.reservedOp lex
charLiteral = T.charLiteral lex
stringLiteral = T.stringLiteral lex
natural = T.natural lex
@@ -52,7 +100,7 @@ decimal = T.decimal lex
hexadecimal = T.hexadecimal lex
octal = T.octal lex
symbol = T.symbol lex
-whiteSpace = T.whiteSpace lex
+-- whiteSpace = T.whiteSpace lex
parens = T.parens lex
braces = T.braces lex
squares = T.squares lex
View
114 src/WebBits/JavaScript/Parser.hs
@@ -25,14 +25,43 @@ import Data.Char(chr)
import Data.Char
-- We parameterize the parse tree over source-locations.
-type ParsedStatement = Statement SourcePos
-type ParsedExpression = Expression SourcePos
+type ParserAnnotation = (SourcePos,Maybe String)
+type ParsedStatement = Statement ParserAnnotation
+type ParsedExpression = Expression ParserAnnotation
-- These parsers can store some arbitrary state
type StatementParser state = CharParser state ParsedStatement
type ExpressionParser state = CharParser state ParsedExpression
+withPos cstr p = do
+ pos <- getPosition
+ e <- p
+ return $ cstr (pos,Nothing) e
+
+stmtConstr1 :: String -- ^reserved word that denotes the statement
+ -> (ParserAnnotation -> a -> ParsedStatement) -- constructor
+ -> CharParser st a -- ^argument parser
+ -> StatementParser st
+stmtConstr1 word constr arg = do
+ pos <- getPosition
+ (_,comment) <- reserved word
+ a <- arg
+ return (constr (pos,comment) a)
+
+stmtConstr2 :: String -- ^reserved word that denotes the statement
+ -> (ParserAnnotation -> a0 -> a1 -> ParsedStatement) -- constructor
+ -> CharParser st a0 -- ^argument parser
+ -> CharParser st a1 -- ^argument parser
+ -> StatementParser st
+stmtConstr2 word constr arg0 arg1 = do
+ pos <- getPosition
+ (_,comment) <- reserved word
+ a0 <- arg0
+ a1 <- arg1
+ return (constr (pos,comment) a0 a1)
+
+
identifier =
liftM2 Id getPosition Lexer.identifier
@@ -50,76 +79,69 @@ identifier =
parseIfStmt:: StatementParser st
parseIfStmt = do
pos <- getPosition
- reserved "if"
+ (_,c) <- reserved "if"
+
test <- parseParenExpr <?> "parenthesized test-expression in if statement"
consequent <- parseStatement <?> "true-branch of if statement"
optional semi -- TODO: in spec?
((do reserved "else"
alternate <- parseStatement
- return (IfStmt pos test consequent alternate))
- <|> return (IfSingleStmt pos test consequent))
+ return (IfStmt (pos,c) test consequent alternate))
+ <|> return (IfSingleStmt (pos,c) test consequent))
parseSwitchStmt:: StatementParser st
-parseSwitchStmt =
+parseSwitchStmt = do
let parseDefault = do
pos <- getPosition
- reserved "default"
+ (_,c) <- reserved "default"
colon
statements <- many parseStatement
- return (CaseDefault pos statements)
- parseCase = do
- pos <- getPosition
- reserved "case"
- condition <- parseListExpr
- colon
- actions <- many parseStatement
- return (CaseClause pos condition actions)
- in do pos <- getPosition
- reserved "switch"
- test <- parseParenExpr
- clauses <- braces $ many $ parseDefault <|> parseCase
- return (SwitchStmt pos test clauses)
+ return (CaseDefault (pos,c) statements)
+ let parseCase = do
+ pos <- getPosition
+ (_,c) <- reserved "case"
+ condition <- parseListExpr
+ colon
+ actions <- many parseStatement
+ return (CaseClause (pos,c) condition actions)
+ let clauses =
+ (liftM2 (:) parseCase clauses) <|>
+ (parseDefault >>= \final -> return [final]) <|>
+ (return [])
+ stmtConstr2 "switch" SwitchStmt parseParenExpr (braces clauses)
parseWhileStmt:: StatementParser st
-parseWhileStmt = do
- pos <- getPosition
- reserved "while"
- test <- parseParenExpr <?> "parenthesized test-expression in while loop"
- body <- parseStatement
- return (WhileStmt pos test body)
+parseWhileStmt = stmtConstr2 "while" WhileStmt
+ (parseParenExpr <?> "parenthesized test-expression in while loop")
+ parseStatement
parseDoWhileStmt:: StatementParser st
-parseDoWhileStmt = do
- pos <- getPosition
- reserved "do"
- body <- parseBlockStmt
- reserved "while" <?> "while at the end of a do block"
- test <- parseParenExpr <?> "parenthesized test-expression in do loop"
- optional semi
- return (DoWhileStmt pos body test)
+parseDoWhileStmt = stmtConstr2 "do" DoWhileStmt
+ parseBlockStmt
+ (reserved "while" >> parseParenExpr)
parseContinueStmt:: StatementParser st
parseContinueStmt = do
pos <- getPosition
- reserved "continue"
+ (_,c) <- reserved "continue"
pos' <- getPosition
-- Ensure that the identifier is on the same line as 'continue.'
id <- (if (sourceLine pos == sourceLine pos')
then (liftM Just identifier) <|> (return Nothing)
else return Nothing)
- return (ContinueStmt pos id)
+ return (ContinueStmt (pos,c) id)
parseBreakStmt:: StatementParser st
parseBreakStmt = do
pos <- getPosition
- reserved "break"
+ (_,c) <- reserved "break"
pos' <- getPosition
-- Ensure that the identifier is on the same line as 'break.'
id <- (if (sourceLine pos == sourceLine pos')
then (liftM Just identifier) <|> (return Nothing)
else return Nothing)
optional semi
- return (BreakStmt pos id)
+ return (BreakStmt (pos,c) id)
parseBlockStmt:: StatementParser st
parseBlockStmt = do
@@ -450,11 +472,6 @@ parseNumLit = do
--}}}
-------------------------------------------------------------------------------
--- Position Helper
-------------------------------------------------------------------------------
-
-withPos cstr p = do { pos <- getPosition; e <- p; return $ cstr pos e }
-------------------------------------------------------------------------------
-- Compound Expression Parsers
@@ -512,7 +529,7 @@ parseSimpleExprForNew Nothing = do
--}}}
makeInfixExpr str constr = Infix parser AssocLeft where
- parser:: CharParser st (Expression SourcePos -> Expression SourcePos -> Expression SourcePos)
+ parser:: CharParser st (ParsedExpression -> ParsedExpression -> ParsedExpression)
parser = do
pos <- getPosition
reservedOp str
@@ -640,26 +657,27 @@ parseListExpr =
--}}}
-parseScript:: CharParser state (JavaScript SourcePos)
+parseScript:: CharParser state (JavaScript ParserAnnotation)
parseScript = do
whiteSpace
liftM2 Script getPosition (parseStatement `sepBy` whiteSpace)
-parseJavaScriptFromFile :: MonadIO m => String -> m [Statement SourcePos]
+parseJavaScriptFromFile :: MonadIO m => String -> m [ParsedStatement]
parseJavaScriptFromFile filename = do
chars <- liftIO $ readFile filename
case parse parseScript filename chars of
Left err -> fail (show err)
Right (Script _ stmts) -> return stmts
-parseScriptFromString:: String -> String -> Either ParseError (JavaScript SourcePos)
+parseScriptFromString :: String -> String
+ -> Either ParseError (JavaScript ParserAnnotation)
parseScriptFromString src script = parse parseScript src script
emptyParsedJavaScript =
Script (error "Parser.emptyParsedJavaScript--no annotation") []
-parseString :: String -> [Statement SourcePos]
+parseString :: String -> [ParsedStatement]
parseString str = case parse parseScript "" str of
Left err -> error (show err)
Right (Script _ stmts) -> stmts

0 comments on commit c989029

Please sign in to comment.
Something went wrong with that request. Please try again.