Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Yeah, we want a lexer in our codebase.

  • Loading branch information...
commit f6ac1807db8023881d8c09d59fedc8cd98ebe48a 1 parent ed66d8a
@trevorc authored
Showing with 120 additions and 72 deletions.
  1. +2 −1  Chawk/AST.hs
  2. +66 −0 Chawk/Lexer.hs
  3. +52 −71 Chawk/Parse.hs
View
3  Chawk/AST.hs
@@ -28,9 +28,10 @@ data Statement
= Break
| Continue
| Next
+ | Delete Name [Expression]
| Exit Expression
| Return Expression
- | Delete Name
+ | ExpressionStatement Expression
| Print
{ format :: Maybe Expression
, fields :: [Expression]
View
66 Chawk/Lexer.hs
@@ -0,0 +1,66 @@
+module Chawk.Lexer where
+
+import qualified Chawk.AST as AST
+
+import Control.Applicative hiding (many, some, (<|>))
+import Control.Monad
+import Data.Function
+import Data.List
+import Data.Ord
+import Text.ParserCombinators.Parsec
+import qualified Data.Map as M
+
+
+whiteSpace :: CharParser st ()
+whiteSpace = void $ many1 $ (many1 $ oneOf " \t\r\f\v") <|> string "\\\n"
+
+lexeme :: CharParser st a -> CharParser st a
+lexeme p = try $ p <* whiteSpace
+
+symbol :: String -> CharParser st ()
+symbol = void . lexeme . string
+
+word = liftA2 (:) identChar $ many $ identChar <|> digit
+ where identChar = char '_' <|> letter
+
+keywords :: [String]
+keywords =
+ [ "atan2", "index", "match", "sprintf", "substr", "close", "int"
+ , "rand", "sqrt", "system", "cos", "length", "sin", "srand"
+ , "tolower", "exp", "log", "split", "sub", "toupper", "gsub"
+ ]
+
+operator :: String -> CharParser st ()
+operator s = do
+ res <- anyOperator
+ guard $ res == s
+
+anyOperator :: CharParser st String
+anyOperator = choice $ map string $ operators
+ where operators = reverse $ sortBy (comparing length)
+ [ "+=", "-=", "*=", "/=", "%=", "^=", "||",
+ "&&", "==", "<=", ">=", "!=", "++", "++",
+ "--", ">>", "!", ">", "<", "|", "?", ":",
+ "~", "$", "=", ",", ";" ]
+
+identifier = lexeme $ do
+ str <- word
+ guard $ str `notElem` keywords
+ return str
+
+keyword :: String -> CharParser st ()
+keyword name = lexeme $ do
+ str <- word
+ guard $ str == name
+
+(<<>>) = between `on` symbol
+
+braces = "{" <<>> "}"
+brackets = "[" <<>> "]"
+parens = "(" <<>> ")"
+
+commaSep :: CharParser st a -> CharParser st [a]
+commaSep = (`sepBy` symbol ",")
+
+newline :: CharParser st ()
+newline = void $ lexeme $ string "\n"
View
123 Chawk/Parse.hs
@@ -3,21 +3,24 @@ module Chawk.Parse
) where
+import Chawk.Lexer
import Chawk.Util
import qualified Chawk.AST as AST
-import Control.Applicative hiding (many, some, (<|>))
+import Control.Applicative hiding (many, optional, some, (<|>))
import Control.Monad
import Data.Either
-import Text.ParserCombinators.Parsec
+import Data.Maybe
+import Text.ParserCombinators.Parsec hiding (newline)
import qualified Data.Map as M
-import qualified Text.ParserCombinators.Parsec.Language as Language
-import qualified Text.ParserCombinators.Parsec.Token as P
--- Taken from latest Control.Monad
-void :: Functor f => f a -> f ()
-void = fmap (const ())
+data ParseState = ParseState
+ { globals :: M.Map String AST.Name
+ , locals :: M.Map String AST.Name
+ }
+
+type AwkParser = CharParser ParseState
parseEither :: CharParser st a
-> CharParser st b
@@ -25,80 +28,58 @@ parseEither :: CharParser st a
l `parseEither` r = (Left <$> l) <|> (Right <$> r)
-type AwkParser = CharParser ParseState
-
-data ParseState = ParseState
- { globals :: M.Map String AST.Name
- , locals :: M.Map String AST.Name
- }
-
initialState :: ParseState
initialState = ParseState M.empty M.empty
parseProgram :: String -> String -> Either ParseError AST.Program
-parseProgram fname contents = runParser program initialState fname contents
-
-awkLanguage :: P.TokenParser st
-awkLanguage = P.makeTokenParser $ Language.emptyDef
- { P.commentLine = "#"
- , P.nestedComments = False
- , P.opStart = opParser
- , P.opLetter = opParser
- , P.reservedNames =
- [ "atan2", "index", "match", "sprintf", "substr", "close", "int",
- "rand", "sqrt", "system", "cos", "length", "sin", "srand",
- "tolower", "exp", "log", "split", "sub", "toupper", "gsub"
- ]
- , P.reservedOpNames =
- [ "+=", "-=", "*=", "/=", "%=", "^=", "||", "&&", "==",
- "<=", ">=", "!=", "++", "++", "--", ">>", "!", ">",
- "<", "|", "?", ":", "~", "$", "="
- ]
- } where
- opParser = oneOf ",;+-*%^!><|?:~$=/"
-
-identifier = P.identifier awkLanguage
-reserved = P.reserved awkLanguage
-operator = P.operator awkLanguage
-reservedOp = P.reservedOp awkLanguage
-charLiteral = P.charLiteral awkLanguage
-stringLiteral = P.stringLiteral awkLanguage
-natural = P.natural awkLanguage
-integer = P.integer awkLanguage
-float = P.float awkLanguage
-naturalOrFloat = P.naturalOrFloat awkLanguage
-decimal = P.decimal awkLanguage
-hexadecimal = P.hexadecimal awkLanguage
-octal = P.octal awkLanguage
-symbol = P.symbol awkLanguage
-lexeme = P.lexeme awkLanguage
-whiteSpace = P.whiteSpace awkLanguage
-parens = P.parens awkLanguage
-braces = P.braces awkLanguage
-brackets = P.brackets awkLanguage
-squares = P.squares awkLanguage
-semi = P.semi awkLanguage
-comma = P.comma awkLanguage
-colon = P.colon awkLanguage
-dot = P.dot awkLanguage
-semiSep = P.semiSep awkLanguage
-semiSep1 = P.semiSep1 awkLanguage
-commaSep = P.commaSep awkLanguage
-commaSep1 = P.commaSep1 awkLanguage
+parseProgram fname contents =
+ runParser program initialState fname contents
fromState :: (ParseState -> a) -> AwkParser a
fromState = (<$> getState)
-statement :: AwkParser AST.Statement
-statement = undefined
+expression :: AwkParser AST.Expression
+expression = mzero
+
+expressionStatement :: AwkParser AST.Statement
+expressionStatement = AST.ExpressionStatement <$> expression
+
+keywordStatement :: String -> AST.Statement -> AwkParser AST.Statement
+keywordStatement s st = keyword s *> pure st
+
+name :: AwkParser AST.Name
+name = empty
+
+deleteStatement :: AwkParser AST.Statement
+deleteStatement = keyword "delete" *>
+ (AST.Delete
+ <$> name
+ <*> brackets (commaSep expression))
+
+printStatement :: AwkParser AST.Statement
+printStatement = empty
+
+simpleStatement :: AwkParser AST.Statement
+simpleStatement = deleteStatement
+ <|> printStatement
+ <|> keywordStatement "break" AST.Break
+ <|> keywordStatement "next" AST.Next
+ <|> keywordStatement "continue" AST.Continue
+ <|> expressionStatement
+
+complexStatement :: AwkParser AST.Statement
+complexStatement = empty
action :: AwkParser AST.Action
-action = braces $ sepEndBy statement statementSep where
- statementSep = void (symbol ";") <|> void newline
+action = braces $ concat <$> sepEndBy statementGroup statementSep
+ where statementSep = newline <|> operator ";"
+ statementGroup =
+ (++) <$> many complexStatement <*>
+ (maybeToList <$> optionMaybe simpleStatement)
parseFunction :: AwkParser AST.Function
parseFunction = do
- reserved "function"
+ keyword "function"
name <- identifier
params <- parens $ commaSep $ AST.LocalName <$> identifier
body <- action
@@ -109,14 +90,14 @@ parseFunction = do
}
parseRule :: AwkParser AST.Rule
-parseRule = undefined
+parseRule = empty
programPart :: AwkParser (Either AST.Function AST.Rule)
-programPart = parseFunction `parseEither` parseRule
+programPart = (parseFunction `parseEither` parseRule) <* many newline
program :: AwkParser AST.Program
program = do
- (functions, rules) <- liftM partitionEithers $ many programPart
+ (functions, rules) <- partitionEithers <$> many programPart
let functionMap = M.fromList $
extractFst AST.functionName <$> functions
glbs <- fromState globals
Please sign in to comment.
Something went wrong with that request. Please try again.